Merge tag 'xfs-5.3-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Jul 2019 00:17:51 +0000 (17:17 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Jul 2019 00:17:51 +0000 (17:17 -0700)
Pull xfs updates from Darrick Wong:
 "In this release there are a significant amounts of consolidations and
  cleanups in the log code; restructuring of the log to issue struct
  bios directly; new bulkstat ioctls to return v5 fs inode information
  (and fix all the padding problems of the old ioctl); the beginnings of
  multithreaded inode walks (e.g. quotacheck); and a reduction in memory
  usage in the online scrub code leading to reduced runtimes.

   - Refactor inode geometry calculation into a single structure instead
     of open-coding pieces everywhere.

   - Add online repair to build options.

   - Remove unnecessary function call flags and functions.

   - Claim maintainership of various loose xfs documentation and header
     files.

   - Use struct bio directly for log buffer IOs instead of struct
     xfs_buf.

   - Reduce log item boilerplate code requirements.

   - Merge log item code spread across too many files.

   - Further distinguish between log item commits and cancellations.

   - Various small cleanups to the ag small allocator.

   - Support cgroup-aware writeback

   - libxfs refactoring for mkfs cleanup

   - Remove unneeded #includes

   - Fix a memory allocation miscalculation in the new log bio code

   - Fix bisection problems

   - Fix a crash in ioend processing caused by tripping over freeing of
     preallocated transactions

   - Split out a generic inode walk mechanism from the bulkstat code,
     hook up all the internal users to use the walking code, then clean
     up bulkstat to serve only the bulkstat ioctls.

   - Add a multithreaded iwalk implementation to speed up quotacheck on
     fast storage with many CPUs.

   - Remove unnecessary return values in logging teardown functions.

   - Supplement the bstat and inogrp structures with new bulkstat and
     inumbers structures that have all the fields we need for v5
     filesystem features and none of the padding problems of their
     predecessors.

   - Wire up new ioctls that use the new structures with a much simpler
     bulk_ireq structure at the head instead of the pointerhappy mess we
     had before.

   - Enable userspace to constrain bulkstat returns to a single AG or a
     single special inode so that we can phase out a lot of geometry
     guesswork in userspace.

   - Reduce memory consumption and zeroing overhead in extended
     attribute scrub code.

   - Fix some behavioral regressions in the new bulkstat backend code.

   - Fix some behavioral regressions in the new log bio code"

* tag 'xfs-5.3-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (100 commits)
  xfs: chain bios the right way around in xfs_rw_bdev
  xfs: bump INUMBERS cursor correctly in xfs_inumbers_walk
  xfs: don't update lastino for FSBULKSTAT_SINGLE
  xfs: online scrub needn't bother zeroing its temporary buffer
  xfs: only allocate memory for scrubbing attributes when we need it
  xfs: refactor attr scrub memory allocation function
  xfs: refactor extended attribute buffer pointer functions
  xfs: attribute scrub should use seen_enough to pass error values
  xfs: allow single bulkstat of special inodes
  xfs: specify AG in bulk req
  xfs: wire up the v5 inumbers ioctl
  xfs: wire up new v5 bulkstat ioctls
  xfs: introduce v5 inode group structure
  xfs: introduce new v5 bulkstat structure
  xfs: rename bulkstat functions
  xfs: remove various bulk request typedef usage
  fs: xfs: xfs_log: Change return type from int to void
  xfs: poll waiting for quotacheck
  xfs: multithreaded iwalk implementation
  xfs: refactor INUMBERS to use iwalk functions
  ...

162 files changed:
Documentation/filesystems/xfs-self-describing-metadata.txt
MAINTAINERS
fs/xfs/Makefile
fs/xfs/kmem.c
fs/xfs/kmem.h
fs/xfs/libxfs/xfs_ag.c
fs/xfs/libxfs/xfs_ag_resv.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_alloc_btree.c
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_attr.h
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_remote.c
fs/xfs/libxfs/xfs_bit.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap_btree.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_btree.h
fs/xfs/libxfs/xfs_da_btree.c
fs/xfs/libxfs/xfs_da_format.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_dir2.c
fs/xfs/libxfs/xfs_dir2_block.c
fs/xfs/libxfs/xfs_dir2_data.c
fs/xfs/libxfs/xfs_dir2_leaf.c
fs/xfs/libxfs/xfs_dir2_node.c
fs/xfs/libxfs/xfs_dir2_sf.c
fs/xfs/libxfs/xfs_dquot_buf.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_fs.h
fs/xfs/libxfs/xfs_health.h
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc.h
fs/xfs/libxfs/xfs_ialloc_btree.c
fs/xfs/libxfs/xfs_ialloc_btree.h
fs/xfs/libxfs/xfs_iext_tree.c
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_inode_fork.c
fs/xfs/libxfs/xfs_log_rlimit.c
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_refcount_btree.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rmap_btree.c
fs/xfs/libxfs/xfs_rtbitmap.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/libxfs/xfs_shared.h
fs/xfs/libxfs/xfs_symlink_remote.c
fs/xfs/libxfs/xfs_trans_resv.c
fs/xfs/libxfs/xfs_trans_space.h
fs/xfs/libxfs/xfs_types.c
fs/xfs/scrub/agheader.c
fs/xfs/scrub/agheader_repair.c
fs/xfs/scrub/alloc.c
fs/xfs/scrub/attr.c
fs/xfs/scrub/attr.h [new file with mode: 0644]
fs/xfs/scrub/bitmap.c
fs/xfs/scrub/bmap.c
fs/xfs/scrub/btree.c
fs/xfs/scrub/common.c
fs/xfs/scrub/dabtree.c
fs/xfs/scrub/dir.c
fs/xfs/scrub/fscounters.c
fs/xfs/scrub/health.c
fs/xfs/scrub/ialloc.c
fs/xfs/scrub/inode.c
fs/xfs/scrub/parent.c
fs/xfs/scrub/quota.c
fs/xfs/scrub/refcount.c
fs/xfs/scrub/repair.c
fs/xfs/scrub/rmap.c
fs/xfs/scrub/rtbitmap.c
fs/xfs/scrub/scrub.c
fs/xfs/scrub/symlink.c
fs/xfs/scrub/trace.c
fs/xfs/xfs_acl.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.h
fs/xfs/xfs_attr_inactive.c
fs/xfs/xfs_attr_list.c
fs/xfs/xfs_bio_io.c [new file with mode: 0644]
fs/xfs/xfs_bmap_item.c
fs/xfs/xfs_bmap_item.h
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_buf_item.h
fs/xfs/xfs_dir2_readdir.c
fs/xfs/xfs_discard.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot.h
fs/xfs/xfs_dquot_item.c
fs/xfs/xfs_dquot_item.h
fs/xfs/xfs_error.c
fs/xfs/xfs_export.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_extfree_item.h
fs/xfs/xfs_file.c
fs/xfs/xfs_filestream.c
fs/xfs/xfs_fsmap.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_globals.c
fs/xfs/xfs_health.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_icreate_item.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_inode_item.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl.h
fs/xfs/xfs_ioctl32.c
fs/xfs/xfs_ioctl32.h
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_itable.h
fs/xfs/xfs_iwalk.c [new file with mode: 0644]
fs/xfs/xfs_iwalk.h [new file with mode: 0644]
fs/xfs/xfs_linux.h
fs/xfs/xfs_log.c
fs/xfs/xfs_log.h
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_message.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_ondisk.h
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_pwork.c [new file with mode: 0644]
fs/xfs/xfs_pwork.h [new file with mode: 0644]
fs/xfs/xfs_qm.c
fs/xfs/xfs_qm_bhv.c
fs/xfs/xfs_qm_syscalls.c
fs/xfs/xfs_quotaops.c
fs/xfs/xfs_refcount_item.c
fs/xfs/xfs_refcount_item.h
fs/xfs/xfs_reflink.c
fs/xfs/xfs_rmap_item.c
fs/xfs/xfs_rmap_item.h
fs/xfs/xfs_rtalloc.c
fs/xfs/xfs_stats.c
fs/xfs/xfs_super.c
fs/xfs/xfs_super.h
fs/xfs/xfs_symlink.c
fs/xfs/xfs_sysctl.c
fs/xfs/xfs_sysctl.h
fs/xfs/xfs_sysfs.c
fs/xfs/xfs_trace.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_ail.c
fs/xfs/xfs_trans_bmap.c [deleted file]
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_trans_dquot.c
fs/xfs/xfs_trans_extfree.c [deleted file]
fs/xfs/xfs_trans_inode.c
fs/xfs/xfs_trans_priv.h
fs/xfs/xfs_trans_refcount.c [deleted file]
fs/xfs/xfs_trans_rmap.c [deleted file]
fs/xfs/xfs_xattr.c

index 68604e67a495fce4db18e0c76e58237c73080854..8db0121d0980c4b7293f76eb8331d09162cc21e9 100644 (file)
@@ -222,7 +222,7 @@ static void
 xfs_foo_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
 
         if ((xfs_sb_version_hascrc(&mp->m_sb) &&
              !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
@@ -245,7 +245,7 @@ static bool
 xfs_foo_verify(
        struct xfs_buf          *bp)
 {
-        struct xfs_mount       *mp = bp->b_target->bt_mount;
+        struct xfs_mount       *mp = bp->b_mount;
         struct xfs_ondisk_hdr  *hdr = bp->b_addr;
 
         if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
@@ -272,7 +272,7 @@ static bool
 xfs_foo_verify(
        struct xfs_buf          *bp)
 {
-        struct xfs_mount       *mp = bp->b_target->bt_mount;
+        struct xfs_mount       *mp = bp->b_mount;
         struct xfs_ondisk_hdr  *hdr = bp->b_addr;
 
         if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
@@ -297,7 +297,7 @@ static void
 xfs_foo_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_fspriv;
 
        if (!xfs_foo_verify(bp)) {
index c7d42239d8d9639ed6725c2784c3f01c23f3799e..1be025959be9ea58580a9cf133d995a561606200 100644 (file)
@@ -17544,7 +17544,13 @@ W:     http://xfs.org/
 T:     git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 S:     Supported
 F:     Documentation/filesystems/xfs.txt
+F:     Documentation/ABI/testing/sysfs-fs-xfs
+F:     Documentation/filesystems/xfs.txt
+F:     Documentation/filesystems/xfs-delayed-logging-design.txt
+F:     Documentation/filesystems/xfs-self-describing-metadata.txt
 F:     fs/xfs/
+F:     include/uapi/linux/dqblk_xfs.h
+F:     include/uapi/linux/fsmap.h
 
 XILINX AXI ETHERNET DRIVER
 M:     Anirudha Sarangi <anirudh@xilinx.com>
index 91831975363b927687acab64d73b3f76f10e81b2..b74a471692970b825706fd96843c402597d509fc 100644 (file)
@@ -62,6 +62,7 @@ xfs-y                         += xfs_aops.o \
                                   xfs_attr_inactive.o \
                                   xfs_attr_list.o \
                                   xfs_bmap_util.o \
+                                  xfs_bio_io.o \
                                   xfs_buf.o \
                                   xfs_dir2_readdir.o \
                                   xfs_discard.o \
@@ -80,9 +81,11 @@ xfs-y                                += xfs_aops.o \
                                   xfs_iops.o \
                                   xfs_inode.o \
                                   xfs_itable.o \
+                                  xfs_iwalk.o \
                                   xfs_message.o \
                                   xfs_mount.o \
                                   xfs_mru_cache.o \
+                                  xfs_pwork.o \
                                   xfs_reflink.o \
                                   xfs_stats.o \
                                   xfs_super.o \
@@ -104,12 +107,8 @@ xfs-y                              += xfs_log.o \
                                   xfs_rmap_item.o \
                                   xfs_log_recover.o \
                                   xfs_trans_ail.o \
-                                  xfs_trans_bmap.o \
                                   xfs_trans_buf.o \
-                                  xfs_trans_extfree.o \
-                                  xfs_trans_inode.o \
-                                  xfs_trans_refcount.o \
-                                  xfs_trans_rmap.o \
+                                  xfs_trans_inode.o
 
 # optional features
 xfs-$(CONFIG_XFS_QUOTA)                += xfs_dquot.o \
index fdd9d6ede25ca74065c2b092c742031068270aae..16bb9a3286781fa41ad319dcbecc99f4af4251fa 100644 (file)
@@ -3,12 +3,7 @@
  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  * All Rights Reserved.
  */
-#include <linux/mm.h>
 #include <linux/sched/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include "kmem.h"
 #include "xfs_message.h"
index 8e6b3ba81c03e398d2b91c76c456760c2a97ae82..267655acd42681e0cec5253e94d05681bb9f2c13 100644 (file)
@@ -124,4 +124,12 @@ kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags)
        return kmem_zone_alloc(zone, flags | KM_ZERO);
 }
 
+static inline struct page *
+kmem_to_page(void *addr)
+{
+       if (is_vmalloc_addr(addr))
+               return vmalloc_to_page(addr);
+       return virt_to_page(addr);
+}
+
 #endif /* __XFS_SUPPORT_KMEM_H__ */
index b0c89f54d1bb05f822f3148daec8d56800f62c28..5de296b34ab1f618ea489225cbcd054bfd9aa7f9 100644 (file)
@@ -10,6 +10,7 @@
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
+#include "xfs_bit.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_btree.h"
@@ -44,6 +45,12 @@ xfs_get_aghdr_buf(
        return bp;
 }
 
+static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
+{
+       return mp->m_sb.sb_logstart > 0 &&
+              id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
+}
+
 /*
  * Generic btree root block init function
  */
@@ -53,40 +60,85 @@ xfs_btroot_init(
        struct xfs_buf          *bp,
        struct aghdr_init_data  *id)
 {
-       xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0);
+       xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
 }
 
-/*
- * Alloc btree root block init functions
- */
+/* Finish initializing a free space btree. */
 static void
-xfs_bnoroot_init(
+xfs_freesp_init_recs(
        struct xfs_mount        *mp,
        struct xfs_buf          *bp,
        struct aghdr_init_data  *id)
 {
        struct xfs_alloc_rec    *arec;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 
-       xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0);
        arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
        arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
+
+       if (is_log_ag(mp, id)) {
+               struct xfs_alloc_rec    *nrec;
+               xfs_agblock_t           start = XFS_FSB_TO_AGBNO(mp,
+                                                       mp->m_sb.sb_logstart);
+
+               ASSERT(start >= mp->m_ag_prealloc_blocks);
+               if (start != mp->m_ag_prealloc_blocks) {
+                       /*
+                        * Modify first record to pad stripe align of log
+                        */
+                       arec->ar_blockcount = cpu_to_be32(start -
+                                               mp->m_ag_prealloc_blocks);
+                       nrec = arec + 1;
+
+                       /*
+                        * Insert second record at start of internal log
+                        * which then gets trimmed.
+                        */
+                       nrec->ar_startblock = cpu_to_be32(
+                                       be32_to_cpu(arec->ar_startblock) +
+                                       be32_to_cpu(arec->ar_blockcount));
+                       arec = nrec;
+                       be16_add_cpu(&block->bb_numrecs, 1);
+               }
+               /*
+                * Change record start to after the internal log
+                */
+               be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks);
+       }
+
+       /*
+        * Calculate the record block count and check for the case where
+        * the log might have consumed all available space in the AG. If
+        * so, reset the record count to 0 to avoid exposure of an invalid
+        * record start block.
+        */
        arec->ar_blockcount = cpu_to_be32(id->agsize -
                                          be32_to_cpu(arec->ar_startblock));
+       if (!arec->ar_blockcount)
+               block->bb_numrecs = 0;
 }
 
+/*
+ * Alloc btree root block init functions
+ */
 static void
-xfs_cntroot_init(
+xfs_bnoroot_init(
        struct xfs_mount        *mp,
        struct xfs_buf          *bp,
        struct aghdr_init_data  *id)
 {
-       struct xfs_alloc_rec    *arec;
+       xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+       xfs_freesp_init_recs(mp, bp, id);
+}
 
-       xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0);
-       arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
-       arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
-       arec->ar_blockcount = cpu_to_be32(id->agsize -
-                                         be32_to_cpu(arec->ar_startblock));
+static void
+xfs_cntroot_init(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp,
+       struct aghdr_init_data  *id)
+{
+       xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+       xfs_freesp_init_recs(mp, bp, id);
 }
 
 /*
@@ -101,7 +153,7 @@ xfs_rmaproot_init(
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_rmap_rec     *rrec;
 
-       xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0);
+       xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
 
        /*
         * mark the AG header regions as static metadata The BNO
@@ -149,6 +201,18 @@ xfs_rmaproot_init(
                rrec->rm_offset = 0;
                be16_add_cpu(&block->bb_numrecs, 1);
        }
+
+       /* account for the log space */
+       if (is_log_ag(mp, id)) {
+               rrec = XFS_RMAP_REC_ADDR(block,
+                               be16_to_cpu(block->bb_numrecs) + 1);
+               rrec->rm_startblock = cpu_to_be32(
+                               XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart));
+               rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks);
+               rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
+               rrec->rm_offset = 0;
+               be16_add_cpu(&block->bb_numrecs, 1);
+       }
 }
 
 /*
@@ -209,6 +273,14 @@ xfs_agfblock_init(
                agf->agf_refcount_level = cpu_to_be32(1);
                agf->agf_refcount_blocks = cpu_to_be32(1);
        }
+
+       if (is_log_ag(mp, id)) {
+               int64_t logblocks = mp->m_sb.sb_logblocks;
+
+               be32_add_cpu(&agf->agf_freeblks, -logblocks);
+               agf->agf_longest = cpu_to_be32(id->agsize -
+                       XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks);
+       }
 }
 
 static void
index e2ba2a3b63b20a6378283e35e1c58c939f1d2476..87a9747f1d36b905755070d9cd85f15827d9afb7 100644 (file)
@@ -9,20 +9,12 @@
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_alloc.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
 #include "xfs_rmap_btree.h"
 #include "xfs_btree.h"
 #include "xfs_refcount_btree.h"
index a9ff3cf82cce0bb0e96bfed46cac5333a6be36ec..372ad55631fc447190e3eafeb47aca5d7a2a1a9a 100644 (file)
@@ -13,7 +13,6 @@
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
-#include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_rmap.h"
 #include "xfs_alloc_btree.h"
@@ -21,7 +20,6 @@
 #include "xfs_extent_busy.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
-#include "xfs_cksum.h"
 #include "xfs_trace.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
@@ -41,8 +39,6 @@ struct workqueue_struct *xfs_alloc_wq;
 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
-               xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
 
 /*
  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
@@ -555,7 +551,7 @@ static xfs_failaddr_t
 xfs_agfl_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
        int             i;
 
@@ -596,7 +592,7 @@ static void
 xfs_agfl_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        xfs_failaddr_t  fa;
 
        /*
@@ -621,7 +617,7 @@ static void
 xfs_agfl_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        xfs_failaddr_t          fa;
 
@@ -699,6 +695,107 @@ xfs_alloc_update_counters(
  * Allocation group level functions.
  */
 
+/*
+ * Deal with the case where only small freespaces remain. Either return the
+ * contents of the last freespace record, or allocate space from the freelist if
+ * there is nothing in the tree.
+ */
+STATIC int                     /* error */
+xfs_alloc_ag_vextent_small(
+       struct xfs_alloc_arg    *args,  /* allocation argument structure */
+       struct xfs_btree_cur    *ccur,  /* optional by-size cursor */
+       xfs_agblock_t           *fbnop, /* result block number */
+       xfs_extlen_t            *flenp, /* result length */
+       int                     *stat)  /* status: 0-freelist, 1-normal/none */
+{
+       int                     error = 0;
+       xfs_agblock_t           fbno = NULLAGBLOCK;
+       xfs_extlen_t            flen = 0;
+       int                     i = 0;
+
+       /*
+        * If a cntbt cursor is provided, try to allocate the largest record in
+        * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
+        * allocation. Make sure to respect minleft even when pulling from the
+        * freelist.
+        */
+       if (ccur)
+               error = xfs_btree_decrement(ccur, 0, &i);
+       if (error)
+               goto error;
+       if (i) {
+               error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
+               if (error)
+                       goto error;
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
+               goto out;
+       }
+
+       if (args->minlen != 1 || args->alignment != 1 ||
+           args->resv == XFS_AG_RESV_AGFL ||
+           (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
+            args->minleft))
+               goto out;
+
+       error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+       if (error)
+               goto error;
+       if (fbno == NULLAGBLOCK)
+               goto out;
+
+       xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
+                             xfs_alloc_allow_busy_reuse(args->datatype));
+
+       if (xfs_alloc_is_userdata(args->datatype)) {
+               struct xfs_buf  *bp;
+
+               bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
+               if (!bp) {
+                       error = -EFSCORRUPTED;
+                       goto error;
+               }
+               xfs_trans_binval(args->tp, bp);
+       }
+       *fbnop = args->agbno = fbno;
+       *flenp = args->len = 1;
+       XFS_WANT_CORRUPTED_GOTO(args->mp,
+               fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
+               error);
+       args->wasfromfl = 1;
+       trace_xfs_alloc_small_freelist(args);
+
+       /*
+        * If we're feeding an AGFL block to something that doesn't live in the
+        * free space, we need to clear out the OWN_AG rmap.
+        */
+       error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
+                             &XFS_RMAP_OINFO_AG);
+       if (error)
+               goto error;
+
+       *stat = 0;
+       return 0;
+
+out:
+       /*
+        * Can't do the allocation, give up.
+        */
+       if (flen < args->minlen) {
+               args->agbno = NULLAGBLOCK;
+               trace_xfs_alloc_small_notenough(args);
+               flen = 0;
+       }
+       *fbnop = fbno;
+       *flenp = flen;
+       *stat = 1;
+       trace_xfs_alloc_small_done(args);
+       return 0;
+
+error:
+       trace_xfs_alloc_small_error(args);
+       return error;
+}
+
 /*
  * Allocate a variable extent in the allocation group agno.
  * Type and bno are used to determine where in the allocation group the
@@ -1582,112 +1679,6 @@ out_nominleft:
        return 0;
 }
 
-/*
- * Deal with the case where only small freespaces remain.
- * Either return the contents of the last freespace record,
- * or allocate space from the freelist if there is nothing in the tree.
- */
-STATIC int                     /* error */
-xfs_alloc_ag_vextent_small(
-       xfs_alloc_arg_t *args,  /* allocation argument structure */
-       xfs_btree_cur_t *ccur,  /* by-size cursor */
-       xfs_agblock_t   *fbnop, /* result block number */
-       xfs_extlen_t    *flenp, /* result length */
-       int             *stat)  /* status: 0-freelist, 1-normal/none */
-{
-       int             error;
-       xfs_agblock_t   fbno;
-       xfs_extlen_t    flen;
-       int             i;
-
-       if ((error = xfs_btree_decrement(ccur, 0, &i)))
-               goto error0;
-       if (i) {
-               if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
-       }
-       /*
-        * Nothing in the btree, try the freelist.  Make sure
-        * to respect minleft even when pulling from the
-        * freelist.
-        */
-       else if (args->minlen == 1 && args->alignment == 1 &&
-                args->resv != XFS_AG_RESV_AGFL &&
-                (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
-                 > args->minleft)) {
-               error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
-               if (error)
-                       goto error0;
-               if (fbno != NULLAGBLOCK) {
-                       xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
-                             xfs_alloc_allow_busy_reuse(args->datatype));
-
-                       if (xfs_alloc_is_userdata(args->datatype)) {
-                               xfs_buf_t       *bp;
-
-                               bp = xfs_btree_get_bufs(args->mp, args->tp,
-                                       args->agno, fbno, 0);
-                               if (!bp) {
-                                       error = -EFSCORRUPTED;
-                                       goto error0;
-                               }
-                               xfs_trans_binval(args->tp, bp);
-                       }
-                       args->len = 1;
-                       args->agbno = fbno;
-                       XFS_WANT_CORRUPTED_GOTO(args->mp,
-                               args->agbno + args->len <=
-                               be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
-                               error0);
-                       args->wasfromfl = 1;
-                       trace_xfs_alloc_small_freelist(args);
-
-                       /*
-                        * If we're feeding an AGFL block to something that
-                        * doesn't live in the free space, we need to clear
-                        * out the OWN_AG rmap.
-                        */
-                       error = xfs_rmap_free(args->tp, args->agbp, args->agno,
-                                       fbno, 1, &XFS_RMAP_OINFO_AG);
-                       if (error)
-                               goto error0;
-
-                       *stat = 0;
-                       return 0;
-               }
-               /*
-                * Nothing in the freelist.
-                */
-               else
-                       flen = 0;
-       }
-       /*
-        * Can't allocate from the freelist for some reason.
-        */
-       else {
-               fbno = NULLAGBLOCK;
-               flen = 0;
-       }
-       /*
-        * Can't do the allocation, give up.
-        */
-       if (flen < args->minlen) {
-               args->agbno = NULLAGBLOCK;
-               trace_xfs_alloc_small_notenough(args);
-               flen = 0;
-       }
-       *fbnop = fbno;
-       *flenp = flen;
-       *stat = 1;
-       trace_xfs_alloc_small_done(args);
-       return 0;
-
-error0:
-       trace_xfs_alloc_small_error(args);
-       return error;
-}
-
 /*
  * Free the extent starting at agno/bno for length.
  */
@@ -2095,7 +2086,7 @@ xfs_free_agfl_block(
        if (error)
                return error;
 
-       bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0);
+       bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
        if (!bp)
                return -EFSCORRUPTED;
        xfs_trans_binval(tp, bp);
@@ -2586,7 +2577,7 @@ static xfs_failaddr_t
 xfs_agf_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_agf          *agf = XFS_BUF_TO_AGF(bp);
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
@@ -2644,7 +2635,7 @@ static void
 xfs_agf_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        xfs_failaddr_t  fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2661,7 +2652,7 @@ static void
 xfs_agf_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        xfs_failaddr_t          fa;
 
@@ -3146,7 +3137,7 @@ xfs_alloc_has_record(
 
 /*
  * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
- * error code or XFS_BTREE_QUERY_RANGE_ABORT.
+ * error code or XFS_ITER_*.
  */
 int
 xfs_agfl_walk(
index 9fe949f6055ec32e89e08d3cf01608cdbb678f42..2a94543857a195e55cd96bc2026075ec7559dc7b 100644 (file)
@@ -17,7 +17,6 @@
 #include "xfs_extent_busy.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_trans.h"
 
 
@@ -292,7 +291,7 @@ static xfs_failaddr_t
 xfs_allocbt_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_perag        *pag = bp->b_pag;
        xfs_failaddr_t          fa;
index c441f41f14e8ffc827fe0b1e1f1245779aa27a86..d48fcf11cc35a40616423f8de015b60817f2b74a 100644 (file)
@@ -9,23 +9,18 @@
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_attr_sf.h"
 #include "xfs_inode.h"
-#include "xfs_alloc.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_attr_remote.h"
-#include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
 #include "xfs_trace.h"
index 3b0dce06e454f265eb7e97bb5712f0e10639e286..ff28ebf3b635d1940f1306006623a072602ecd5f 100644 (file)
@@ -112,7 +112,13 @@ typedef struct xfs_attr_list_context {
        struct xfs_inode                *dp;            /* inode */
        struct attrlist_cursor_kern     *cursor;        /* position in list */
        char                            *alist;         /* output buffer */
-       int                             seen_enough;    /* T/F: seen enough of list? */
+
+       /*
+        * Abort attribute list iteration if non-zero.  Can be used to pass
+        * error values to the xfs_attr_list caller.
+        */
+       int                             seen_enough;
+
        ssize_t                         count;          /* num used entries */
        int                             dupcnt;         /* count dup hashvals seen */
        int                             bufsize;        /* total buffer size */
index 1f6e3965ff7425456ca64477a713573cb5e7943a..70eb941d02e4dc406c3d16fc01cab71e9e68472f 100644 (file)
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_attr_sf.h"
@@ -27,7 +25,6 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_buf_item.h"
-#include "xfs_cksum.h"
 #include "xfs_dir2.h"
 #include "xfs_log.h"
 
@@ -240,7 +237,7 @@ xfs_attr3_leaf_verify(
        struct xfs_buf                  *bp)
 {
        struct xfs_attr3_icleaf_hdr     ichdr;
-       struct xfs_mount                *mp = bp->b_target->bt_mount;
+       struct xfs_mount                *mp = bp->b_mount;
        struct xfs_attr_leafblock       *leaf = bp->b_addr;
        struct xfs_attr_leaf_entry      *entries;
        uint32_t                        end;    /* must be 32bit - see below */
@@ -313,7 +310,7 @@ static void
 xfs_attr3_leaf_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
        xfs_failaddr_t          fa;
@@ -343,7 +340,7 @@ static void
 xfs_attr3_leaf_read_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -865,7 +862,7 @@ xfs_attr_shortform_allfit(
        struct xfs_attr3_icleaf_hdr leafhdr;
        int                     bytes;
        int                     i;
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
 
        leaf = bp->b_addr;
        xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
@@ -1525,7 +1522,7 @@ xfs_attr_leaf_order(
 {
        struct xfs_attr3_icleaf_hdr ichdr1;
        struct xfs_attr3_icleaf_hdr ichdr2;
-       struct xfs_mount *mp = leaf1_bp->b_target->bt_mount;
+       struct xfs_mount *mp = leaf1_bp->b_mount;
 
        xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
        xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
@@ -2568,7 +2565,7 @@ xfs_attr_leaf_lasthash(
 {
        struct xfs_attr3_icleaf_hdr ichdr;
        struct xfs_attr_leaf_entry *entries;
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
 
        xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
        entries = xfs_attr3_leaf_entryp(bp->b_addr);
index 65ff600a8067875f3d898481e1ef2c271d55bdd9..4eb30d3570457a1d3aab5b5b3ba2c18b3d62d890 100644 (file)
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_inode.h"
-#include "xfs_alloc.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
 #include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_attr_remote.h"
-#include "xfs_trans_space.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_buf_item.h"
 #include "xfs_error.h"
 
 #define ATTR_RMTVALUE_MAPSIZE  1       /* # of map entries at once */
@@ -111,7 +103,7 @@ __xfs_attr3_rmt_read_verify(
        bool            check_crc,
        xfs_failaddr_t  *failaddr)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        char            *ptr;
        int             len;
        xfs_daddr_t     bno;
@@ -175,7 +167,7 @@ static void
 xfs_attr3_rmt_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        xfs_failaddr_t  fa;
        int             blksize = mp->m_attr_geo->blksize;
        char            *ptr;
@@ -535,7 +527,7 @@ xfs_attr_rmtval_set(
                dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
                dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
 
-               bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
+               bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
                if (!bp)
                        return -ENOMEM;
                bp->b_ops = &xfs_attr3_rmt_buf_ops;
index 40ce5f3094d19d399bca5758b428177dcadd7824..7071ff98fdbc8e569c4a274bd9c3ccbfa8a5ffc9 100644 (file)
@@ -5,7 +5,6 @@
  */
 #include "xfs.h"
 #include "xfs_log_format.h"
-#include "xfs_bit.h"
 
 /*
  * XFS bit manipulation routines, used in non-realtime code.
index 356ebd1cbe82518c9898c2bb1a2befa0f5206c5b..baf0b72c0a37deddd4936556a985cb020bf5ff4d 100644 (file)
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_extfree_item.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
@@ -32,7 +28,6 @@
 #include "xfs_trans_space.h"
 #include "xfs_buf_item.h"
 #include "xfs_trace.h"
-#include "xfs_symlink.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_filestream.h"
 #include "xfs_rmap.h"
@@ -370,7 +365,7 @@ xfs_bmap_check_leaf_extents(
                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
                if (!bp) {
                        bp_release = 1;
-                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                       error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
                                                XFS_BMAP_BTREE_REF,
                                                &xfs_bmbt_buf_ops);
                        if (error)
@@ -454,7 +449,7 @@ xfs_bmap_check_leaf_extents(
                bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
                if (!bp) {
                        bp_release = 1;
-                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                       error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
                                                XFS_BMAP_BTREE_REF,
                                                &xfs_bmbt_buf_ops);
                        if (error)
@@ -619,7 +614,7 @@ xfs_bmap_btree_to_extents(
        XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
                        xfs_btree_check_lptr(cur, cbno, 1));
 #endif
-       error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
+       error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
                                &xfs_bmbt_buf_ops);
        if (error)
                return error;
@@ -732,7 +727,7 @@ xfs_bmap_extents_to_btree(
        cur->bc_private.b.allocated++;
        ip->i_d.di_nblocks++;
        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
-       abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
+       abp = xfs_btree_get_bufl(mp, tp, args.fsbno);
        if (!abp) {
                error = -EFSCORRUPTED;
                goto out_unreserve_dquot;
@@ -878,7 +873,7 @@ xfs_bmap_local_to_extents(
        ASSERT(args.fsbno != NULLFSBLOCK);
        ASSERT(args.len == 1);
        tp->t_firstblock = args.fsbno;
-       bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
+       bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno);
 
        /*
         * Initialize the block, copy the data and log the remote buffer.
@@ -1203,7 +1198,7 @@ xfs_iread_extents(
         * pointer (leftmost) at each level.
         */
        while (level-- > 0) {
-               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+               error = xfs_btree_read_bufl(mp, tp, bno, &bp,
                                XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
                if (error)
                        goto out;
@@ -1276,7 +1271,7 @@ xfs_iread_extents(
                 */
                if (bno == NULLFSBLOCK)
                        break;
-               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+               error = xfs_btree_read_bufl(mp, tp, bno, &bp,
                                XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
                if (error)
                        goto out;
index aff82ed112c93c26f43bed5ada5fd4b82e4e3711..fbb18ba5d90538af2a34361981248e0a16278d7c 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
 #include "xfs_bmap_btree.h"
@@ -22,7 +20,6 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_rmap.h"
 
 /*
@@ -411,7 +408,7 @@ static xfs_failaddr_t
 xfs_bmbt_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        xfs_failaddr_t          fa;
        unsigned int            level;
index bbdae2b4559fc91d0e7f650fcfe6ed81868b5512..f1048efa4268053e3dbb39d6c9db1b685bbd7446 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_buf_item.h"
 #include "xfs_btree.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_alloc.h"
 #include "xfs_log.h"
 
@@ -276,7 +273,7 @@ xfs_btree_lblock_calc_crc(
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_buf_log_item *bip = bp->b_log_item;
 
-       if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+       if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
                return;
        if (bip)
                block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -288,7 +285,7 @@ xfs_btree_lblock_verify_crc(
        struct xfs_buf          *bp)
 {
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
@@ -314,7 +311,7 @@ xfs_btree_sblock_calc_crc(
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_buf_log_item *bip = bp->b_log_item;
 
-       if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+       if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
                return;
        if (bip)
                block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -326,7 +323,7 @@ xfs_btree_sblock_verify_crc(
        struct xfs_buf          *bp)
 {
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
@@ -691,14 +688,13 @@ xfs_buf_t *                               /* buffer for fsbno */
 xfs_btree_get_bufl(
        xfs_mount_t     *mp,            /* file system mount point */
        xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_fsblock_t   fsbno,          /* file system block number */
-       uint            lock)           /* lock flags for get_buf */
+       xfs_fsblock_t   fsbno)          /* file system block number */
 {
        xfs_daddr_t             d;              /* real disk block address */
 
        ASSERT(fsbno != NULLFSBLOCK);
        d = XFS_FSB_TO_DADDR(mp, fsbno);
-       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
 }
 
 /*
@@ -710,15 +706,14 @@ xfs_btree_get_bufs(
        xfs_mount_t     *mp,            /* file system mount point */
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_agnumber_t  agno,           /* allocation group number */
-       xfs_agblock_t   agbno,          /* allocation group block number */
-       uint            lock)           /* lock flags for get_buf */
+       xfs_agblock_t   agbno)          /* allocation group block number */
 {
        xfs_daddr_t             d;              /* real disk block address */
 
        ASSERT(agno != NULLAGNUMBER);
        ASSERT(agbno != NULLAGBLOCK);
        d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
 }
 
 /*
@@ -845,7 +840,6 @@ xfs_btree_read_bufl(
        struct xfs_mount        *mp,            /* file system mount point */
        struct xfs_trans        *tp,            /* transaction pointer */
        xfs_fsblock_t           fsbno,          /* file system block number */
-       uint                    lock,           /* lock flags for read_buf */
        struct xfs_buf          **bpp,          /* buffer for fsbno */
        int                     refval,         /* ref count value for buffer */
        const struct xfs_buf_ops *ops)
@@ -858,7 +852,7 @@ xfs_btree_read_bufl(
                return -EFSCORRUPTED;
        d = XFS_FSB_TO_DADDR(mp, fsbno);
        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
-                                  mp->m_bsize, lock, &bp, ops);
+                                  mp->m_bsize, 0, &bp, ops);
        if (error)
                return error;
        if (bp)
@@ -1185,11 +1179,10 @@ xfs_btree_init_block(
        xfs_btnum_t     btnum,
        __u16           level,
        __u16           numrecs,
-       __u64           owner,
-       unsigned int    flags)
+       __u64           owner)
 {
        xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
-                                btnum, level, numrecs, owner, flags);
+                                btnum, level, numrecs, owner, 0);
 }
 
 STATIC void
@@ -1288,7 +1281,6 @@ STATIC int
 xfs_btree_get_buf_block(
        struct xfs_btree_cur    *cur,
        union xfs_btree_ptr     *ptr,
-       int                     flags,
        struct xfs_btree_block  **block,
        struct xfs_buf          **bpp)
 {
@@ -1296,14 +1288,11 @@ xfs_btree_get_buf_block(
        xfs_daddr_t             d;
        int                     error;
 
-       /* need to sort out how callers deal with failures first */
-       ASSERT(!(flags & XBF_TRYLOCK));
-
        error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
        if (error)
                return error;
        *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
-                                mp->m_bsize, flags);
+                                mp->m_bsize, 0);
 
        if (!*bpp)
                return -ENOMEM;
@@ -2706,7 +2695,7 @@ __xfs_btree_split(
        XFS_BTREE_STATS_INC(cur, alloc);
 
        /* Set up the new block as "right". */
-       error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
+       error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
        if (error)
                goto error0;
 
@@ -2961,7 +2950,7 @@ xfs_btree_new_iroot(
        XFS_BTREE_STATS_INC(cur, alloc);
 
        /* Copy the root into a real block. */
-       error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
+       error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
        if (error)
                goto error0;
 
@@ -3058,7 +3047,7 @@ xfs_btree_new_root(
        XFS_BTREE_STATS_INC(cur, alloc);
 
        /* Set up the new block. */
-       error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
+       error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
        if (error)
                goto error0;
 
@@ -4433,7 +4422,7 @@ xfs_btree_lblock_v5hdr_verify(
        struct xfs_buf          *bp,
        uint64_t                owner)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 
        if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -4454,7 +4443,7 @@ xfs_btree_lblock_verify(
        struct xfs_buf          *bp,
        unsigned int            max_recs)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
 
        /* numrecs verification */
@@ -4484,7 +4473,7 @@ xfs_failaddr_t
 xfs_btree_sblock_v5hdr_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_perag        *pag = bp->b_pag;
 
@@ -4510,7 +4499,7 @@ xfs_btree_sblock_verify(
        struct xfs_buf          *bp,
        unsigned int            max_recs)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        xfs_agblock_t           agno;
 
index e3b3e9dce5da3b24caaa52389f985fc571803b4a..fa3cd8ab9aba34aa1a16b3352ea0d368fb1026aa 100644 (file)
@@ -301,8 +301,7 @@ struct xfs_buf *                            /* buffer for fsbno */
 xfs_btree_get_bufl(
        struct xfs_mount        *mp,    /* file system mount point */
        struct xfs_trans        *tp,    /* transaction pointer */
-       xfs_fsblock_t           fsbno,  /* file system block number */
-       uint                    lock);  /* lock flags for get_buf */
+       xfs_fsblock_t           fsbno); /* file system block number */
 
 /*
  * Get a buffer for the block, return it with no data read.
@@ -313,8 +312,7 @@ xfs_btree_get_bufs(
        struct xfs_mount        *mp,    /* file system mount point */
        struct xfs_trans        *tp,    /* transaction pointer */
        xfs_agnumber_t          agno,   /* allocation group number */
-       xfs_agblock_t           agbno,  /* allocation group block number */
-       uint                    lock);  /* lock flags for get_buf */
+       xfs_agblock_t           agbno); /* allocation group block number */
 
 /*
  * Check for the cursor referring to the last block at the given level.
@@ -345,7 +343,6 @@ xfs_btree_read_bufl(
        struct xfs_mount        *mp,    /* file system mount point */
        struct xfs_trans        *tp,    /* transaction pointer */
        xfs_fsblock_t           fsbno,  /* file system block number */
-       uint                    lock,   /* lock flags for read_buf */
        struct xfs_buf          **bpp,  /* buffer for fsbno */
        int                     refval, /* ref count value for buffer */
        const struct xfs_buf_ops *ops);
@@ -383,8 +380,7 @@ xfs_btree_init_block(
        xfs_btnum_t     btnum,
        __u16           level,
        __u16           numrecs,
-       __u64           owner,
-       unsigned int    flags);
+       __u64           owner);
 
 void
 xfs_btree_init_block_int(
@@ -469,8 +465,8 @@ uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
 unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
 
 /* return codes */
-#define XFS_BTREE_QUERY_RANGE_CONTINUE 0       /* keep iterating */
-#define XFS_BTREE_QUERY_RANGE_ABORT    1       /* stop iterating */
+#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */
+#define XFS_BTREE_QUERY_RANGE_ABORT    (XFS_ITER_ABORT)    /* stop iterating */
 typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
                union xfs_btree_rec *rec, void *priv);
 
index e2737e2ac2aeb5e31a997ee3ed5f3800bf5ecfa7..d1c77fd0815da89cca4882c7bd4c19efac357745 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
 #include "xfs_bmap.h"
-#include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_buf_item.h"
 #include "xfs_log.h"
 
@@ -126,7 +120,7 @@ xfs_da3_blkinfo_verify(
        struct xfs_buf          *bp,
        struct xfs_da3_blkinfo  *hdr3)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_da_blkinfo   *hdr = &hdr3->hdr;
 
        if (!xfs_verify_magic16(bp, hdr->magic))
@@ -148,7 +142,7 @@ static xfs_failaddr_t
 xfs_da3_node_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_da_intnode   *hdr = bp->b_addr;
        struct xfs_da3_icnode_hdr ichdr;
        const struct xfs_dir_ops *ops;
@@ -186,7 +180,7 @@ static void
 xfs_da3_node_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
        xfs_failaddr_t          fa;
index b39053dcb643976fe571a13db58202c0dcdb251f..b1ae572496b699b2c5e84b3f1792c850e32fae0b 100644 (file)
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
 
 /*
  * Shortform directory ops
index 1c6bf2105939f15534c95ce7531fc6715212da31..eb2be2a6a25a92927e963b5f7f5ae7469ebc0417 100644 (file)
@@ -9,8 +9,6 @@
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
index 156ce95c9c4545de6b03cd638463e23f7fa4746e..67840723edbbc68dcde52eeb5747725896525abd 100644 (file)
@@ -5,20 +5,16 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
index b7d6d78f4ce2f3ef263fd54d8523702045dc5c40..a6fb0cc2085eff66357a29411e1ae817da783e65 100644 (file)
@@ -6,22 +6,19 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_buf_item.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_log.h"
 
 /*
@@ -50,7 +47,7 @@ static xfs_failaddr_t
 xfs_dir3_block_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
        if (!xfs_verify_magic(bp, hdr3->magic))
@@ -71,7 +68,7 @@ static void
 xfs_dir3_block_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -88,7 +85,7 @@ static void
 xfs_dir3_block_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
        xfs_failaddr_t          fa;
index b7b9ce002cb97838d2413ad579d499c582fda3d6..2c79be4c3153855d5dc3425bc590c135fe7e4786 100644 (file)
@@ -6,19 +6,16 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
 #include "xfs_error.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
-#include "xfs_cksum.h"
 #include "xfs_log.h"
 
 static xfs_failaddr_t xfs_dir2_data_freefind_verify(
@@ -50,14 +47,13 @@ __xfs_dir3_data_check(
        int                     i;              /* leaf index */
        int                     lastfree;       /* last entry was unused */
        xfs_dir2_leaf_entry_t   *lep=NULL;      /* block leaf entries */
-       xfs_mount_t             *mp;            /* filesystem mount point */
+       struct xfs_mount        *mp = bp->b_mount;
        char                    *p;             /* current data position */
        int                     stale;          /* count of stale leaves */
        struct xfs_name         name;
        const struct xfs_dir_ops *ops;
        struct xfs_da_geometry  *geo;
 
-       mp = bp->b_target->bt_mount;
        geo = mp->m_dir_geo;
 
        /*
@@ -249,7 +245,7 @@ static xfs_failaddr_t
 xfs_dir3_data_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
 
        if (!xfs_verify_magic(bp, hdr3->magic))
@@ -298,7 +294,7 @@ static void
 xfs_dir3_data_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -315,7 +311,7 @@ static void
 xfs_dir3_data_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
        xfs_failaddr_t          fa;
index 9c2a0a13ed61289b43cb2fbb6b86a29452c0323b..a53e4585a2f3ab6a88a5782a82820a62b971bffb 100644 (file)
@@ -6,12 +6,11 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
 #include "xfs_dir2.h"
@@ -20,8 +19,6 @@
 #include "xfs_trace.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
-#include "xfs_cksum.h"
-#include "xfs_log.h"
 
 /*
  * Local function declarations.
@@ -144,7 +141,7 @@ static xfs_failaddr_t
 xfs_dir3_leaf_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_dir2_leaf    *leaf = bp->b_addr;
        xfs_failaddr_t          fa;
 
@@ -159,7 +156,7 @@ static void
 xfs_dir3_leaf_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -176,7 +173,7 @@ static void
 xfs_dir3_leaf_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
        xfs_failaddr_t          fa;
index 16731d2d684be4097277695d9d87937f4b5b0afe..afcc6642690a8d85aebab8849321b01c50016b98 100644 (file)
@@ -6,12 +6,11 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
 #include "xfs_dir2.h"
@@ -20,7 +19,6 @@
 #include "xfs_trace.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
-#include "xfs_cksum.h"
 #include "xfs_log.h"
 
 /*
@@ -84,7 +82,7 @@ static xfs_failaddr_t
 xfs_dir3_free_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_dir2_free_hdr *hdr = bp->b_addr;
 
        if (!xfs_verify_magic(bp, hdr->magic))
@@ -110,7 +108,7 @@ static void
 xfs_dir3_free_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        xfs_failaddr_t          fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -127,7 +125,7 @@ static void
 xfs_dir3_free_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
        xfs_failaddr_t          fa;
index 585dfdb7b6b688f13d43fed7dd5878afedfbc8a9..033589257f54f8cb08b513f863faffa3ed3b134b 100644 (file)
@@ -5,16 +5,13 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_error.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_trace.h"
index 88fa11071f9f1cb76373668f1e5645dc8ab6201e..e8bd688a4073d909df133b806ce7d79c27e00090 100644 (file)
@@ -16,8 +16,6 @@
 #include "xfs_trans.h"
 #include "xfs_qm.h"
 #include "xfs_error.h"
-#include "xfs_cksum.h"
-#include "xfs_trace.h"
 
 int
 xfs_calc_dquots_per_chunk(
@@ -224,7 +222,7 @@ static xfs_failaddr_t
 xfs_dquot_buf_verify_struct(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
 
        return xfs_dquot_buf_verify(mp, bp, false);
 }
@@ -233,7 +231,7 @@ static void
 xfs_dquot_buf_read_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
 
        if (!xfs_dquot_buf_verify_crc(mp, bp, false))
                return;
@@ -250,7 +248,7 @@ static void
 xfs_dquot_buf_readahead_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
 
        if (!xfs_dquot_buf_verify_crc(mp, bp, true) ||
            xfs_dquot_buf_verify(mp, bp, true) != NULL) {
@@ -268,7 +266,7 @@ static void
 xfs_dquot_buf_write_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
 
        xfs_dquot_buf_verify(mp, bp, false);
 }
index 9bb3c48843ec216591e9a00994d56fda5d45fb7f..c968b60cee15bf14d8d6d590db9e8e83802ce117 100644 (file)
@@ -1071,7 +1071,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 #define        XFS_INO_MASK(k)                 (uint32_t)((1ULL << (k)) - 1)
 #define        XFS_INO_OFFSET_BITS(mp)         (mp)->m_sb.sb_inopblog
 #define        XFS_INO_AGBNO_BITS(mp)          (mp)->m_sb.sb_agblklog
-#define        XFS_INO_AGINO_BITS(mp)          (mp)->m_agino_log
+#define        XFS_INO_AGINO_BITS(mp)          ((mp)->m_ino_geo.agino_log)
 #define        XFS_INO_AGNO_BITS(mp)           (mp)->m_agno_log
 #define        XFS_INO_BITS(mp)                \
        XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)
index e7382c780ed7ec30ad13439a56752dd2097a27e7..52d03a3a02a4e4a8b21bae62d0871b494d642e2d 100644 (file)
@@ -97,7 +97,7 @@ struct getbmapx {
  * For use by backup and restore programs to set the XFS on-disk inode
  * fields di_dmevmask and di_dmstate.  These must be set to exactly and
  * only values previously obtained via xfs_bulkstat!  (Specifically the
- * xfs_bstat_t fields bs_dmevmask and bs_dmstate.)
+ * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
  */
 #ifndef HAVE_FSDMIDATA
 struct fsdmidata {
@@ -328,7 +328,7 @@ typedef struct xfs_bstime {
        __s32           tv_nsec;        /* and nanoseconds      */
 } xfs_bstime_t;
 
-typedef struct xfs_bstat {
+struct xfs_bstat {
        __u64           bs_ino;         /* inode number                 */
        __u16           bs_mode;        /* type and mode                */
        __u16           bs_nlink;       /* number of links              */
@@ -356,7 +356,53 @@ typedef struct xfs_bstat {
        __u32           bs_dmevmask;    /* DMIG event mask              */
        __u16           bs_dmstate;     /* DMIG state info              */
        __u16           bs_aextents;    /* attribute number of extents  */
-} xfs_bstat_t;
+};
+
+/* New bulkstat structure that reports v5 features and fixes padding issues */
+struct xfs_bulkstat {
+       uint64_t        bs_ino;         /* inode number                 */
+       uint64_t        bs_size;        /* file size                    */
+
+       uint64_t        bs_blocks;      /* number of blocks             */
+       uint64_t        bs_xflags;      /* extended flags               */
+
+       uint64_t        bs_atime;       /* access time, seconds         */
+       uint64_t        bs_mtime;       /* modify time, seconds         */
+
+       uint64_t        bs_ctime;       /* inode change time, seconds   */
+       uint64_t        bs_btime;       /* creation time, seconds       */
+
+       uint32_t        bs_gen;         /* generation count             */
+       uint32_t        bs_uid;         /* user id                      */
+       uint32_t        bs_gid;         /* group id                     */
+       uint32_t        bs_projectid;   /* project id                   */
+
+       uint32_t        bs_atime_nsec;  /* access time, nanoseconds     */
+       uint32_t        bs_mtime_nsec;  /* modify time, nanoseconds     */
+       uint32_t        bs_ctime_nsec;  /* inode change time, nanoseconds */
+       uint32_t        bs_btime_nsec;  /* creation time, nanoseconds   */
+
+       uint32_t        bs_blksize;     /* block size                   */
+       uint32_t        bs_rdev;        /* device value                 */
+       uint32_t        bs_cowextsize_blks; /* cow extent size hint, blocks */
+       uint32_t        bs_extsize_blks; /* extent size hint, blocks    */
+
+       uint32_t        bs_nlink;       /* number of links              */
+       uint32_t        bs_extents;     /* number of extents            */
+       uint32_t        bs_aextents;    /* attribute number of extents  */
+       uint16_t        bs_version;     /* structure version            */
+       uint16_t        bs_forkoff;     /* inode fork offset in bytes   */
+
+       uint16_t        bs_sick;        /* sick inode metadata          */
+       uint16_t        bs_checked;     /* checked inode metadata       */
+       uint16_t        bs_mode;        /* type and mode                */
+       uint16_t        bs_pad2;        /* zeroed                       */
+
+       uint64_t        bs_pad[7];      /* zeroed                       */
+};
+
+#define XFS_BULKSTAT_VERSION_V1        (1)
+#define XFS_BULKSTAT_VERSION_V5        (5)
 
 /* bs_sick flags */
 #define XFS_BS_SICK_INODE      (1 << 0)  /* inode core */
@@ -374,7 +420,7 @@ typedef struct xfs_bstat {
  * to retain compatibility with "old" filesystems).
  */
 static inline uint32_t
-bstat_get_projid(struct xfs_bstat *bs)
+bstat_get_projid(const struct xfs_bstat *bs)
 {
        return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
 }
@@ -382,23 +428,79 @@ bstat_get_projid(struct xfs_bstat *bs)
 /*
  * The user-level BulkStat Request interface structure.
  */
-typedef struct xfs_fsop_bulkreq {
+struct xfs_fsop_bulkreq {
        __u64           __user *lastip; /* last inode # pointer         */
        __s32           icount;         /* count of entries in buffer   */
        void            __user *ubuffer;/* user buffer for inode desc.  */
        __s32           __user *ocount; /* output count pointer         */
-} xfs_fsop_bulkreq_t;
-
+};
 
 /*
  * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS).
  */
-typedef struct xfs_inogrp {
+struct xfs_inogrp {
        __u64           xi_startino;    /* starting inode number        */
        __s32           xi_alloccount;  /* # bits set in allocmask      */
        __u64           xi_allocmask;   /* mask of allocated inodes     */
-} xfs_inogrp_t;
+};
 
+/* New inumbers structure that reports v5 features and fixes padding issues */
+struct xfs_inumbers {
+       uint64_t        xi_startino;    /* starting inode number        */
+       uint64_t        xi_allocmask;   /* mask of allocated inodes     */
+       uint8_t         xi_alloccount;  /* # bits set in allocmask      */
+       uint8_t         xi_version;     /* version                      */
+       uint8_t         xi_padding[6];  /* zero                         */
+};
+
+#define XFS_INUMBERS_VERSION_V1        (1)
+#define XFS_INUMBERS_VERSION_V5        (5)
+
+/* Header for bulk inode requests. */
+struct xfs_bulk_ireq {
+       uint64_t        ino;            /* I/O: start with this inode   */
+       uint32_t        flags;          /* I/O: operation flags         */
+       uint32_t        icount;         /* I: count of entries in buffer */
+       uint32_t        ocount;         /* O: count of entries filled out */
+       uint32_t        agno;           /* I: see comment for IREQ_AGNO */
+       uint64_t        reserved[5];    /* must be zero                 */
+};
+
+/*
+ * Only return results from the specified @agno.  If @ino is zero, start
+ * with the first inode of @agno.
+ */
+#define XFS_BULK_IREQ_AGNO     (1 << 0)
+
+/*
+ * Return bulkstat information for a single inode, where @ino value is a
+ * special value, not a literal inode number.  See the XFS_BULK_IREQ_SPECIAL_*
+ * values below.  Not compatible with XFS_BULK_IREQ_AGNO.
+ */
+#define XFS_BULK_IREQ_SPECIAL  (1 << 1)
+
+#define XFS_BULK_IREQ_FLAGS_ALL        (XFS_BULK_IREQ_AGNO | \
+                                XFS_BULK_IREQ_SPECIAL)
+
+/* Operate on the root directory inode. */
+#define XFS_BULK_IREQ_SPECIAL_ROOT     (1)
+
+/*
+ * ioctl structures for v5 bulkstat and inumbers requests
+ */
+struct xfs_bulkstat_req {
+       struct xfs_bulk_ireq    hdr;
+       struct xfs_bulkstat     bulkstat[];
+};
+#define XFS_BULKSTAT_REQ_SIZE(nr)      (sizeof(struct xfs_bulkstat_req) + \
+                                        (nr) * sizeof(struct xfs_bulkstat))
+
+struct xfs_inumbers_req {
+       struct xfs_bulk_ireq    hdr;
+       struct xfs_inumbers     inumbers[];
+};
+#define XFS_INUMBERS_REQ_SIZE(nr)      (sizeof(struct xfs_inumbers_req) + \
+                                        (nr) * sizeof(struct xfs_inumbers))
 
 /*
  * Error injection.
@@ -529,7 +631,7 @@ typedef struct xfs_swapext
        xfs_off_t       sx_offset;      /* offset into file */
        xfs_off_t       sx_length;      /* leng from offset */
        char            sx_pad[16];     /* pad space, unused */
-       xfs_bstat_t     sx_stat;        /* stat of target b4 copy */
+       struct xfs_bstat sx_stat;       /* stat of target b4 copy */
 } xfs_swapext_t;
 
 /*
@@ -701,6 +803,8 @@ struct xfs_scrub_metadata {
 #define XFS_IOC_FSGEOMETRY_V4       _IOR ('X', 124, struct xfs_fsop_geom_v4)
 #define XFS_IOC_GOINGDOWN           _IOR ('X', 125, uint32_t)
 #define XFS_IOC_FSGEOMETRY          _IOR ('X', 126, struct xfs_fsop_geom)
+#define XFS_IOC_BULKSTAT            _IOR ('X', 127, struct xfs_bulkstat_req)
+#define XFS_IOC_INUMBERS            _IOR ('X', 128, struct xfs_inumbers_req)
 /*     XFS_IOC_GETFSUUID ---------- deprecated 140      */
 
 
index 49ddfeac19f25bfac8b1b4ecb1d76d645fb1e1f5..272005ac8c882db9f84f68abb525572d04ef5ddd 100644 (file)
@@ -185,6 +185,6 @@ xfs_inode_is_healthy(struct xfs_inode *ip)
 
 void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
 void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
-void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs);
+void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
 
 #endif /* __XFS_HEALTH_H__ */
index fe9898875097f5cd8506f9664f636a393cce2e2e..04377ab75863033cbcb31312a6e6f5356498b3a3 100644 (file)
 #include "xfs_bit.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_bmap.h"
-#include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_icreate_item.h"
 #include "xfs_log.h"
 #include "xfs_rmap.h"
 
-
-/*
- * Allocation group level functions.
- */
-int
-xfs_ialloc_cluster_alignment(
-       struct xfs_mount        *mp)
-{
-       if (xfs_sb_version_hasalign(&mp->m_sb) &&
-           mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
-               return mp->m_sb.sb_inoalignmt;
-       return 1;
-}
-
 /*
  * Lookup a record by ino in the btree given by cur.
  */
@@ -299,7 +282,7 @@ xfs_ialloc_inode_init(
         * sizes, manipulate the inodes in buffers  which are multiples of the
         * blocks size.
         */
-       nbufs = length / mp->m_blocks_per_cluster;
+       nbufs = length / M_IGEO(mp)->blocks_per_cluster;
 
        /*
         * Figure out what version number to use in the inodes we create.  If
@@ -343,9 +326,10 @@ xfs_ialloc_inode_init(
                 * Get the block.
                 */
                d = XFS_AGB_TO_DADDR(mp, agno, agbno +
-                               (j * mp->m_blocks_per_cluster));
+                               (j * M_IGEO(mp)->blocks_per_cluster));
                fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
-                                        mp->m_bsize * mp->m_blocks_per_cluster,
+                                        mp->m_bsize *
+                                        M_IGEO(mp)->blocks_per_cluster,
                                         XBF_UNMAPPED);
                if (!fbuf)
                        return -ENOMEM;
@@ -353,7 +337,7 @@ xfs_ialloc_inode_init(
                /* Initialize the inode buffers and log them appropriately. */
                fbuf->b_ops = &xfs_inode_buf_ops;
                xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
-               for (i = 0; i < mp->m_inodes_per_cluster; i++) {
+               for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
                        int     ioffset = i << mp->m_sb.sb_inodelog;
                        uint    isize = xfs_dinode_size(version);
 
@@ -616,24 +600,26 @@ error:
  * Allocate new inodes in the allocation group specified by agbp.
  * Return 0 for success, else error code.
  */
-STATIC int                             /* error code or 0 */
+STATIC int
 xfs_ialloc_ag_alloc(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_buf_t       *agbp,          /* alloc group buffer */
-       int             *alloc)
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       int                     *alloc)
 {
-       xfs_agi_t       *agi;           /* allocation group header */
-       xfs_alloc_arg_t args;           /* allocation argument structure */
-       xfs_agnumber_t  agno;
-       int             error;
-       xfs_agino_t     newino;         /* new first inode's number */
-       xfs_agino_t     newlen;         /* new number of inodes */
-       int             isaligned = 0;  /* inode allocation at stripe unit */
-                                       /* boundary */
-       uint16_t        allocmask = (uint16_t) -1; /* init. to full chunk */
+       struct xfs_agi          *agi;
+       struct xfs_alloc_arg    args;
+       xfs_agnumber_t          agno;
+       int                     error;
+       xfs_agino_t             newino;         /* new first inode's number */
+       xfs_agino_t             newlen;         /* new number of inodes */
+       int                     isaligned = 0;  /* inode allocation at stripe */
+                                               /* unit boundary */
+       /* init. to full chunk */
+       uint16_t                allocmask = (uint16_t) -1;
        struct xfs_inobt_rec_incore rec;
-       struct xfs_perag *pag;
-       int             do_sparse = 0;
+       struct xfs_perag        *pag;
+       struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
+       int                     do_sparse = 0;
 
        memset(&args, 0, sizeof(args));
        args.tp = tp;
@@ -644,7 +630,7 @@ xfs_ialloc_ag_alloc(
 #ifdef DEBUG
        /* randomly do sparse inode allocations */
        if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
-           args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
+           igeo->ialloc_min_blks < igeo->ialloc_blks)
                do_sparse = prandom_u32() & 1;
 #endif
 
@@ -652,12 +638,12 @@ xfs_ialloc_ag_alloc(
         * Locking will ensure that we don't have two callers in here
         * at one time.
         */
-       newlen = args.mp->m_ialloc_inos;
-       if (args.mp->m_maxicount &&
+       newlen = igeo->ialloc_inos;
+       if (igeo->maxicount &&
            percpu_counter_read_positive(&args.mp->m_icount) + newlen >
-                                                       args.mp->m_maxicount)
+                                                       igeo->maxicount)
                return -ENOSPC;
-       args.minlen = args.maxlen = args.mp->m_ialloc_blks;
+       args.minlen = args.maxlen = igeo->ialloc_blks;
        /*
         * First try to allocate inodes contiguous with the last-allocated
         * chunk of inodes.  If the filesystem is striped, this will fill
@@ -667,7 +653,7 @@ xfs_ialloc_ag_alloc(
        newino = be32_to_cpu(agi->agi_newino);
        agno = be32_to_cpu(agi->agi_seqno);
        args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
-                    args.mp->m_ialloc_blks;
+                    igeo->ialloc_blks;
        if (do_sparse)
                goto sparse_alloc;
        if (likely(newino != NULLAGINO &&
@@ -690,10 +676,10 @@ xfs_ialloc_ag_alloc(
                 * but not to use them in the actual exact allocation.
                 */
                args.alignment = 1;
-               args.minalignslop = args.mp->m_cluster_align - 1;
+               args.minalignslop = igeo->cluster_align - 1;
 
                /* Allow space for the inode btree to split. */
-               args.minleft = args.mp->m_in_maxlevels - 1;
+               args.minleft = igeo->inobt_maxlevels - 1;
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
 
@@ -720,12 +706,12 @@ xfs_ialloc_ag_alloc(
                 * pieces, so don't need alignment anyway.
                 */
                isaligned = 0;
-               if (args.mp->m_sinoalign) {
+               if (igeo->ialloc_align) {
                        ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
                        args.alignment = args.mp->m_dalign;
                        isaligned = 1;
                } else
-                       args.alignment = args.mp->m_cluster_align;
+                       args.alignment = igeo->cluster_align;
                /*
                 * Need to figure out where to allocate the inode blocks.
                 * Ideally they should be spaced out through the a.g.
@@ -741,7 +727,7 @@ xfs_ialloc_ag_alloc(
                /*
                 * Allow space for the inode btree to split.
                 */
-               args.minleft = args.mp->m_in_maxlevels - 1;
+               args.minleft = igeo->inobt_maxlevels - 1;
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
        }
@@ -754,7 +740,7 @@ xfs_ialloc_ag_alloc(
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
                args.agbno = be32_to_cpu(agi->agi_root);
                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
-               args.alignment = args.mp->m_cluster_align;
+               args.alignment = igeo->cluster_align;
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
        }
@@ -764,7 +750,7 @@ xfs_ialloc_ag_alloc(
         * the sparse allocation length is smaller than a full chunk.
         */
        if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
-           args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
+           igeo->ialloc_min_blks < igeo->ialloc_blks &&
            args.fsbno == NULLFSBLOCK) {
 sparse_alloc:
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -773,7 +759,7 @@ sparse_alloc:
                args.alignment = args.mp->m_sb.sb_spino_align;
                args.prod = 1;
 
-               args.minlen = args.mp->m_ialloc_min_blks;
+               args.minlen = igeo->ialloc_min_blks;
                args.maxlen = args.minlen;
 
                /*
@@ -789,7 +775,7 @@ sparse_alloc:
                args.min_agbno = args.mp->m_sb.sb_inoalignmt;
                args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
                                            args.mp->m_sb.sb_inoalignmt) -
-                                args.mp->m_ialloc_blks;
+                                igeo->ialloc_blks;
 
                error = xfs_alloc_vextent(&args);
                if (error)
@@ -1006,7 +992,7 @@ xfs_ialloc_ag_select(
                 * space needed for alignment of inode chunks when checking the
                 * longest contiguous free space in the AG - this prevents us
                 * from getting ENOSPC because we have free space larger than
-                * m_ialloc_blks but alignment constraints prevent us from using
+                * ialloc_blks but alignment constraints prevent us from using
                 * it.
                 *
                 * If we can't find an AG with space for full alignment slack to
@@ -1015,9 +1001,9 @@ xfs_ialloc_ag_select(
                 * if we fail allocation due to alignment issues then it is most
                 * likely a real ENOSPC condition.
                 */
-               ineed = mp->m_ialloc_min_blks;
+               ineed = M_IGEO(mp)->ialloc_min_blks;
                if (flags && ineed > 1)
-                       ineed += mp->m_cluster_align;
+                       ineed += M_IGEO(mp)->cluster_align;
                longest = pag->pagf_longest;
                if (!longest)
                        longest = pag->pagf_flcount > 0;
@@ -1703,6 +1689,7 @@ xfs_dialloc(
        int                     noroom = 0;
        xfs_agnumber_t          start_agno;
        struct xfs_perag        *pag;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
        int                     okalloc = 1;
 
        if (*IO_agbp) {
@@ -1733,9 +1720,9 @@ xfs_dialloc(
         * Read rough value of mp->m_icount by percpu_counter_read_positive,
         * which will sacrifice the preciseness but improve the performance.
         */
-       if (mp->m_maxicount &&
-           percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
-                                                       > mp->m_maxicount) {
+       if (igeo->maxicount &&
+           percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
+                                                       > igeo->maxicount) {
                noroom = 1;
                okalloc = 0;
        }
@@ -1852,7 +1839,8 @@ xfs_difree_inode_chunk(
        if (!xfs_inobt_issparse(rec->ir_holemask)) {
                /* not sparse, calculate extent info directly */
                xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
-                                 mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES);
+                                 M_IGEO(mp)->ialloc_blks,
+                                 &XFS_RMAP_OINFO_INODES);
                return;
        }
 
@@ -2261,7 +2249,7 @@ xfs_imap_lookup(
 
        /* check that the returned record contains the required inode */
        if (rec.ir_startino > agino ||
-           rec.ir_startino + mp->m_ialloc_inos <= agino)
+           rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino)
                return -EINVAL;
 
        /* for untrusted inodes check it is allocated first */
@@ -2352,7 +2340,7 @@ xfs_imap(
         * If the inode cluster size is the same as the blocksize or
         * smaller we get to the buffer by simple arithmetics.
         */
-       if (mp->m_blocks_per_cluster == 1) {
+       if (M_IGEO(mp)->blocks_per_cluster == 1) {
                offset = XFS_INO_TO_OFFSET(mp, ino);
                ASSERT(offset < mp->m_sb.sb_inopblock);
 
@@ -2368,8 +2356,8 @@ xfs_imap(
         * find the location. Otherwise we have to do a btree
         * lookup to find the location.
         */
-       if (mp->m_inoalign_mask) {
-               offset_agbno = agbno & mp->m_inoalign_mask;
+       if (M_IGEO(mp)->inoalign_mask) {
+               offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
                chunk_agbno = agbno - offset_agbno;
        } else {
                error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
@@ -2381,13 +2369,13 @@ xfs_imap(
 out_map:
        ASSERT(agbno >= chunk_agbno);
        cluster_agbno = chunk_agbno +
-               ((offset_agbno / mp->m_blocks_per_cluster) *
-                mp->m_blocks_per_cluster);
+               ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) *
+                M_IGEO(mp)->blocks_per_cluster);
        offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
                XFS_INO_TO_OFFSET(mp, ino);
 
        imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
-       imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+       imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
        imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
 
        /*
@@ -2408,20 +2396,6 @@ out_map:
        return 0;
 }
 
-/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
-       xfs_mount_t     *mp)            /* file system mount structure */
-{
-       uint            inodes;
-
-       inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
-       mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr,
-                                                        inodes);
-}
-
 /*
  * Log specified fields for the ag hdr (inode section). The growth of the agi
  * structure over time requires that we interpret the buffer as two logical
@@ -2493,7 +2467,7 @@ static xfs_failaddr_t
 xfs_agi_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        struct xfs_agi  *agi = XFS_BUF_TO_AGI(bp);
        int             i;
 
@@ -2545,7 +2519,7 @@ static void
 xfs_agi_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        xfs_failaddr_t  fa;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2562,7 +2536,7 @@ static void
 xfs_agi_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        xfs_failaddr_t          fa;
 
@@ -2768,3 +2742,110 @@ xfs_ialloc_count_inodes(
        *freecount = ci.freecount;
        return 0;
 }
+
+/*
+ * Initialize inode-related geometry information.
+ *
+ * Compute the inode btree min and max levels and set maxicount.
+ *
+ * Set the inode cluster size.  This may still be overridden by the file
+ * system block size if it is larger than the chosen cluster size.
+ *
+ * For v5 filesystems, scale the cluster size with the inode size to keep a
+ * constant ratio of inode per cluster buffer, but only if mkfs has set the
+ * inode alignment value appropriately for larger cluster sizes.
+ *
+ * Then compute the inode cluster alignment information.
+ */
+void
+xfs_ialloc_setup_geometry(
+       struct xfs_mount        *mp)
+{
+       struct xfs_sb           *sbp = &mp->m_sb;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
+       uint64_t                icount;
+       uint                    inodes;
+
+       /* Compute inode btree geometry. */
+       igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+       igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
+       igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
+       igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
+       igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
+
+       igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
+                       sbp->sb_inopblock);
+       igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog;
+
+       if (sbp->sb_spino_align)
+               igeo->ialloc_min_blks = sbp->sb_spino_align;
+       else
+               igeo->ialloc_min_blks = igeo->ialloc_blks;
+
+       /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */
+       inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
+       igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
+                       inodes);
+
+       /* Set the maximum inode count for this filesystem. */
+       if (sbp->sb_imax_pct) {
+               /*
+                * Make sure the maximum inode count is a multiple
+                * of the units we allocate inodes in.
+                */
+               icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+               do_div(icount, 100);
+               do_div(icount, igeo->ialloc_blks);
+               igeo->maxicount = XFS_FSB_TO_INO(mp,
+                               icount * igeo->ialloc_blks);
+       } else {
+               igeo->maxicount = 0;
+       }
+
+       /*
+        * Compute the desired size of an inode cluster buffer size, which
+        * starts at 8K and (on v5 filesystems) scales up with larger inode
+        * sizes.
+        *
+        * Preserve the desired inode cluster size because the sparse inodes
+        * feature uses that desired size (not the actual size) to compute the
+        * sparse inode alignment.  The mount code validates this value, so we
+        * cannot change the behavior.
+        */
+       igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               int     new_size = igeo->inode_cluster_size_raw;
+
+               new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+               if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+                       igeo->inode_cluster_size_raw = new_size;
+       }
+
+       /* Calculate inode cluster ratios. */
+       if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize)
+               igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp,
+                               igeo->inode_cluster_size_raw);
+       else
+               igeo->blocks_per_cluster = 1;
+       igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster);
+       igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
+
+       /* Calculate inode cluster alignment. */
+       if (xfs_sb_version_hasalign(&mp->m_sb) &&
+           mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
+               igeo->cluster_align = mp->m_sb.sb_inoalignmt;
+       else
+               igeo->cluster_align = 1;
+       igeo->inoalign_mask = igeo->cluster_align - 1;
+       igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align);
+
+       /*
+        * If we are using stripe alignment, check whether
+        * the stripe unit is a multiple of the inode alignment
+        */
+       if (mp->m_dalign && igeo->inoalign_mask &&
+           !(mp->m_dalign & igeo->inoalign_mask))
+               igeo->ialloc_align = mp->m_dalign;
+       else
+               igeo->ialloc_align = 0;
+}
index e936b7cc93893061f11ac7338ad7a9ba8f01ae27..323592d563d520f9f940fc1913d475bb6ce90708 100644 (file)
@@ -23,16 +23,6 @@ struct xfs_icluster {
                                         * sparse chunks */
 };
 
-/* Calculate and return the number of filesystem blocks per inode cluster */
-static inline int
-xfs_icluster_size_fsb(
-       struct xfs_mount        *mp)
-{
-       if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size)
-               return 1;
-       return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
-}
-
 /*
  * Make an inode pointer out of the buffer/offset.
  */
@@ -95,13 +85,6 @@ xfs_imap(
        struct xfs_imap *imap,          /* location map structure */
        uint            flags);         /* flags for inode btree lookup */
 
-/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
-       struct xfs_mount *mp);          /* file system mount structure */
-
 /*
  * Log specified fields for the ag hdr (inode section)
  */
@@ -168,5 +151,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
                int *stat);
 
 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
+void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
 
 #endif /* __XFS_IALLOC_H__ */
index bc2dfacd2f4a01c83864361862442ab63c4a73b2..b82992f795aa969024ba9c1b326691ab93662fb2 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_mount.h"
-#include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_rmap.h"
 
@@ -28,7 +26,7 @@ xfs_inobt_get_minrecs(
        struct xfs_btree_cur    *cur,
        int                     level)
 {
-       return cur->bc_mp->m_inobt_mnr[level != 0];
+       return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0];
 }
 
 STATIC struct xfs_btree_cur *
@@ -164,7 +162,7 @@ xfs_inobt_get_maxrecs(
        struct xfs_btree_cur    *cur,
        int                     level)
 {
-       return cur->bc_mp->m_inobt_mxr[level != 0];
+       return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0];
 }
 
 STATIC void
@@ -255,7 +253,7 @@ static xfs_failaddr_t
 xfs_inobt_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        xfs_failaddr_t          fa;
        unsigned int            level;
@@ -281,10 +279,11 @@ xfs_inobt_verify(
 
        /* level verification */
        level = be16_to_cpu(block->bb_level);
-       if (level >= mp->m_in_maxlevels)
+       if (level >= M_IGEO(mp)->inobt_maxlevels)
                return __this_address;
 
-       return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
+       return xfs_btree_sblock_verify(bp,
+                       M_IGEO(mp)->inobt_mxr[level != 0]);
 }
 
 static void
@@ -546,7 +545,7 @@ xfs_inobt_max_size(
        xfs_agblock_t           agblocks = xfs_ag_block_count(mp, agno);
 
        /* Bail out if we're uninitialized, which can happen in mkfs. */
-       if (mp->m_inobt_mxr[0] == 0)
+       if (M_IGEO(mp)->inobt_mxr[0] == 0)
                return 0;
 
        /*
@@ -558,11 +557,41 @@ xfs_inobt_max_size(
            XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno)
                agblocks -= mp->m_sb.sb_logblocks;
 
-       return xfs_btree_calc_size(mp->m_inobt_mnr,
+       return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr,
                                (uint64_t)agblocks * mp->m_sb.sb_inopblock /
                                        XFS_INODES_PER_CHUNK);
 }
 
+/* Read AGI and create inobt cursor. */
+int
+xfs_inobt_cur(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_agnumber_t          agno,
+       xfs_btnum_t             which,
+       struct xfs_btree_cur    **curpp,
+       struct xfs_buf          **agi_bpp)
+{
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       ASSERT(*agi_bpp == NULL);
+       ASSERT(*curpp == NULL);
+
+       error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp);
+       if (error)
+               return error;
+
+       cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which);
+       if (!cur) {
+               xfs_trans_brelse(tp, *agi_bpp);
+               *agi_bpp = NULL;
+               return -ENOMEM;
+       }
+       *curpp = cur;
+       return 0;
+}
+
 static int
 xfs_inobt_count_blocks(
        struct xfs_mount        *mp,
@@ -571,15 +600,14 @@ xfs_inobt_count_blocks(
        xfs_btnum_t             btnum,
        xfs_extlen_t            *tree_blocks)
 {
-       struct xfs_buf          *agbp;
-       struct xfs_btree_cur    *cur;
+       struct xfs_buf          *agbp = NULL;
+       struct xfs_btree_cur    *cur = NULL;
        int                     error;
 
-       error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+       error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp);
        if (error)
                return error;
 
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
        error = xfs_btree_count_blocks(cur, tree_blocks);
        xfs_btree_del_cursor(cur, error);
        xfs_trans_brelse(tp, agbp);
@@ -619,5 +647,5 @@ xfs_iallocbt_calc_size(
        struct xfs_mount        *mp,
        unsigned long long      len)
 {
-       return xfs_btree_calc_size(mp->m_inobt_mnr, len);
+       return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len);
 }
index ebdd0c6b8766228bdd04b6ad854413a15c88ee62..951305ecaae1b951d6435a6ebed037f1f6da053e 100644 (file)
@@ -64,5 +64,8 @@ int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
                xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
 extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
                unsigned long long len);
+int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp,
+               xfs_agnumber_t agno, xfs_btnum_t btnum,
+               struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp);
 
 #endif /* __XFS_IALLOC_BTREE_H__ */
index bc690f2409faab3135fc1cf857263fab99faf2a8..27aa3f2bc4bc4273c96ede2a3cfbc91bc8d7e6bb 100644 (file)
@@ -3,18 +3,14 @@
  * Copyright (c) 2017 Christoph Hellwig.
  */
 
-#include <linux/cache.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_bmap.h"
 #include "xfs_trace.h"
 
 /*
index e021d5133ccb42d7b51f916180420bb41421aa09..28ab3c5255e1875727e74655bc6134ca7ba5a858 100644 (file)
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
-#include "xfs_cksum.h"
 #include "xfs_icache.h"
 #include "xfs_trans.h"
 #include "xfs_ialloc.h"
@@ -33,12 +31,9 @@ xfs_inobp_check(
        xfs_buf_t       *bp)
 {
        int             i;
-       int             j;
        xfs_dinode_t    *dip;
 
-       j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-
-       for (i = 0; i < j; i++) {
+       for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
                dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
                if (!dip->di_next_unlinked)  {
                        xfs_alert(mp,
@@ -80,7 +75,7 @@ xfs_inode_buf_verify(
        struct xfs_buf  *bp,
        bool            readahead)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        xfs_agnumber_t  agno;
        int             i;
        int             ni;
index f9acf1d436f690952b9e8d5c4f33b3109acb3cba..bf3e0401824658359c5758f032494d5e2d4f6bec 100644 (file)
@@ -3,10 +3,10 @@
  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  */
-#include <linux/log2.h>
 
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_attr_sf.h"
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_attr_leaf.h"
-#include "xfs_shared.h"
 
 kmem_zone_t *xfs_ifork_zone;
 
index 1b542ec11d5d450bb9e43afab396882853bfc78f..7f55eb3f365367a254ba78bfc191b16baec62f86 100644 (file)
@@ -12,9 +12,7 @@
 #include "xfs_mount.h"
 #include "xfs_da_format.h"
 #include "xfs_trans_space.h"
-#include "xfs_inode.h"
 #include "xfs_da_btree.h"
-#include "xfs_attr_leaf.h"
 #include "xfs_bmap_btree.h"
 
 /*
index 542aa1475b5f969b3faf98f92fa2121461b5d629..51bb9bdb0e847af138baa7ef70b3610364308c69 100644 (file)
@@ -9,7 +9,6 @@
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_btree.h"
@@ -19,7 +18,6 @@
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_bit.h"
 #include "xfs_refcount.h"
index 5d9de9b217266cfa0d9b9e054047cbb146ad9250..38529dbacd5566900a04ace644777ba879abf206 100644 (file)
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_btree.h"
-#include "xfs_bmap.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_bit.h"
 #include "xfs_rmap.h"
@@ -203,7 +201,7 @@ STATIC xfs_failaddr_t
 xfs_refcountbt_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_perag        *pag = bp->b_pag;
        xfs_failaddr_t          fa;
index 8ed885507dd82c9e5d156e434373f9e3630b3dfd..e6aeb390b2fb66db53b056b7738ec8bc35835013 100644 (file)
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_btree.h"
 #include "xfs_trans.h"
 #include "xfs_alloc.h"
 #include "xfs_rmap.h"
 #include "xfs_rmap_btree.h"
-#include "xfs_trans_space.h"
 #include "xfs_trace.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
-#include "xfs_extent_busy.h"
-#include "xfs_bmap.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc.h"
 
 /*
  * Lookup the first record less than or equal to [bno, len, owner, offset]
index 5d1f8884c8886eedc81bb54acd90c8445ad62826..fc78efa52c94ed45d7ae25a2aa256bab5e70bb72 100644 (file)
@@ -9,18 +9,14 @@
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_alloc.h"
 #include "xfs_btree.h"
 #include "xfs_rmap.h"
 #include "xfs_rmap_btree.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_error.h"
 #include "xfs_extent_busy.h"
 #include "xfs_ag_resv.h"
@@ -292,7 +288,7 @@ static xfs_failaddr_t
 xfs_rmapbt_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
        struct xfs_perag        *pag = bp->b_pag;
        xfs_failaddr_t          fa;
index eaaff67e9626869b7d6b296f226bf4e2b109660c..8ea1efc97b41d180a9c79c90b82314c80598655f 100644 (file)
 #include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
 #include "xfs_trans.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
-#include "xfs_icache.h"
 #include "xfs_rtalloc.h"
 
 
index e76a3e5d28d77dd9187495319717264895c81644..a08dd8f40346fae595b455e86dc784b90a3c4767 100644 (file)
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
 #include "xfs_log.h"
 #include "xfs_rmap_btree.h"
-#include "xfs_bmap.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_health.h"
 
 /*
@@ -686,7 +679,7 @@ xfs_sb_read_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_sb           sb;
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_dsb          *dsb = XFS_BUF_TO_SBP(bp);
        int                     error;
 
@@ -752,7 +745,7 @@ xfs_sb_write_verify(
        struct xfs_buf          *bp)
 {
        struct xfs_sb           sb;
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        int                     error;
 
@@ -800,12 +793,14 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
  *
  * Mount initialization code establishing various mount
  * fields from the superblock associated with the given
- * mount structure
+ * mount structure.
+ *
+ * Inode geometry are calculated in xfs_ialloc_setup_geometry.
  */
 void
 xfs_sb_mount_common(
-       struct xfs_mount *mp,
-       struct xfs_sb   *sbp)
+       struct xfs_mount        *mp,
+       struct xfs_sb           *sbp)
 {
        mp->m_agfrotor = mp->m_agirotor = 0;
        mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -813,7 +808,6 @@ xfs_sb_mount_common(
        mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
        mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
        mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
-       mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
        mp->m_blockmask = sbp->sb_blocksize - 1;
        mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
        mp->m_blockwmask = mp->m_blockwsize - 1;
@@ -823,11 +817,6 @@ xfs_sb_mount_common(
        mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
        mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
 
-       mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
-       mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
-       mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
-       mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
-
        mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
        mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
        mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
@@ -844,14 +833,6 @@ xfs_sb_mount_common(
        mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
 
        mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
-       mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
-                                       sbp->sb_inopblock);
-       mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
-
-       if (sbp->sb_spino_align)
-               mp->m_ialloc_min_blks = sbp->sb_spino_align;
-       else
-               mp->m_ialloc_min_blks = mp->m_ialloc_blks;
        mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
        mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
 }
@@ -939,7 +920,7 @@ xfs_log_sb(
        struct xfs_trans        *tp)
 {
        struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_buf          *bp = xfs_trans_getsb(tp, mp, 0);
+       struct xfs_buf          *bp = xfs_trans_getsb(tp, mp);
 
        mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
        mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
@@ -1005,7 +986,7 @@ xfs_update_secondary_sbs(
 
                bp = xfs_buf_get(mp->m_ddev_targp,
                                 XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
-                                XFS_FSS_TO_BB(mp, 1), 0);
+                                XFS_FSS_TO_BB(mp, 1));
                /*
                 * If we get an error reading or writing alternate superblocks,
                 * continue.  xfs_repair chooses the "best" superblock based
@@ -1069,7 +1050,7 @@ xfs_sync_sb_buf(
        if (error)
                return error;
 
-       bp = xfs_trans_getsb(tp, mp, 0);
+       bp = xfs_trans_getsb(tp, mp);
        xfs_log_sb(tp);
        xfs_trans_bhold(tp, bp);
        xfs_trans_set_sync(tp);
index 4e909791aeac48a9ca82c6eb5564ca8e2cc7cadc..e0641b7337b3cf27fb6b0fd4a5953691901e0154 100644 (file)
@@ -65,7 +65,6 @@ void  xfs_log_get_max_trans_res(struct xfs_mount *mp,
 #define XFS_TRANS_DQ_DIRTY     0x10    /* at least one dquot in trx dirty */
 #define XFS_TRANS_RESERVE      0x20    /* OK to use reserved data blocks */
 #define XFS_TRANS_NO_WRITECOUNT 0x40   /* do not elevate SB writecount */
-#define XFS_TRANS_NOFS         0x80    /* pass KM_NOFS to kmem_alloc */
 /*
  * LOWMODE is used by the allocator to activate the lowspace algorithm - when
  * free space is running low the extent allocator may choose to allocate an
@@ -136,4 +135,52 @@ void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
                                 struct xfs_inode *ip, struct xfs_ifork *ifp);
 xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
 
+/* Computed inode geometry for the filesystem. */
+struct xfs_ino_geometry {
+       /* Maximum inode count in this filesystem. */
+       uint64_t        maxicount;
+
+       /* Actual inode cluster buffer size, in bytes. */
+       unsigned int    inode_cluster_size;
+
+       /*
+        * Desired inode cluster buffer size, in bytes.  This value is not
+        * rounded up to at least one filesystem block, which is necessary for
+        * the sole purpose of validating sb_spino_align.  Runtime code must
+        * only ever use inode_cluster_size.
+        */
+       unsigned int    inode_cluster_size_raw;
+
+       /* Inode cluster sizes, adjusted to be at least 1 fsb. */
+       unsigned int    inodes_per_cluster;
+       unsigned int    blocks_per_cluster;
+
+       /* Inode cluster alignment. */
+       unsigned int    cluster_align;
+       unsigned int    cluster_align_inodes;
+       unsigned int    inoalign_mask;  /* mask sb_inoalignmt if used */
+
+       unsigned int    inobt_mxr[2]; /* max inobt btree records */
+       unsigned int    inobt_mnr[2]; /* min inobt btree records */
+       unsigned int    inobt_maxlevels; /* max inobt btree levels. */
+
+       /* Size of inode allocations under normal operation. */
+       unsigned int    ialloc_inos;
+       unsigned int    ialloc_blks;
+
+       /* Minimum inode blocks for a sparse allocation. */
+       unsigned int    ialloc_min_blks;
+
+       /* stripe unit inode alignment */
+       unsigned int    ialloc_align;
+
+       unsigned int    agino_log;      /* #bits for agino in inum */
+};
+
+/* Keep iterating the data structure. */
+#define XFS_ITER_CONTINUE      (0)
+
+/* Stop iterating the data structure. */
+#define XFS_ITER_ABORT         (1)
+
 #endif /* __XFS_SHARED_H__ */
index a0ccc253c43d0a4c5733c28086c2475c7be5a67b..3b8260ca7d1b80525f6846a1fdded11fd112558b 100644 (file)
 #include "xfs_shared.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
 #include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_symlink.h"
-#include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_log.h"
@@ -90,7 +86,7 @@ static xfs_failaddr_t
 xfs_symlink_verify(
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        struct xfs_dsymlink_hdr *dsl = bp->b_addr;
 
        if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -116,7 +112,7 @@ static void
 xfs_symlink_read_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        xfs_failaddr_t  fa;
 
        /* no verification of non-crc buffers */
@@ -136,7 +132,7 @@ static void
 xfs_symlink_write_verify(
        struct xfs_buf  *bp)
 {
-       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_mount *mp = bp->b_mount;
        struct xfs_buf_log_item *bip = bp->b_log_item;
        xfs_failaddr_t          fa;
 
index 83f4ee2afc49e8092d0d7b733b02f996596f0541..d12bbd526e7c02ff21eaf07c37a09d1d3b13ad51 100644 (file)
 #include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_quota.h"
 #include "xfs_trans.h"
 #include "xfs_qm.h"
 #include "xfs_trans_space.h"
-#include "xfs_trace.h"
 
 #define _ALLOC true
 #define _FREE  false
@@ -136,9 +134,10 @@ STATIC uint
 xfs_calc_inobt_res(
        struct xfs_mount        *mp)
 {
-       return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
-                                XFS_FSB_TO_B(mp, 1));
+       return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
+                       XFS_FSB_TO_B(mp, 1)) +
+                               xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+                       XFS_FSB_TO_B(mp, 1));
 }
 
 /*
@@ -167,7 +166,7 @@ xfs_calc_finobt_res(
  * includes:
  *
  * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the inode chunk: m_ialloc_blks * N
+ * the inode chunk: m_ino_geo.ialloc_blks * N
  *
  * The size N of the inode chunk reservation depends on whether it is for
  * allocation or free and which type of create transaction is in use. An inode
@@ -193,7 +192,7 @@ xfs_calc_inode_chunk_res(
                size = XFS_FSB_TO_B(mp, 1);
        }
 
-       res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
+       res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
        return res;
 }
 
@@ -307,7 +306,7 @@ xfs_calc_iunlink_remove_reservation(
        struct xfs_mount        *mp)
 {
        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-              2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+              2 * M_IGEO(mp)->inode_cluster_size;
 }
 
 /*
@@ -345,7 +344,7 @@ STATIC uint
 xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
 {
        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+                       M_IGEO(mp)->inode_cluster_size;
 }
 
 /*
index a62fb950bef18acfe820098082a19172feeadef5..88221c7a04ccfedd9a78aff832dbdef3ffb29673 100644 (file)
@@ -56,9 +56,9 @@
 #define        XFS_DIRREMOVE_SPACE_RES(mp)     \
        XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
 #define        XFS_IALLOC_SPACE_RES(mp)        \
-       ((mp)->m_ialloc_blks + \
+       (M_IGEO(mp)->ialloc_blks + \
         (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
-         ((mp)->m_in_maxlevels - 1)))
+         (M_IGEO(mp)->inobt_maxlevels - 1)))
 
 /*
  * Space reservation values for various transactions.
@@ -94,7 +94,8 @@
 #define        XFS_SYMLINK_SPACE_RES(mp,nl,b)  \
        (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
 #define XFS_IFREE_SPACE_RES(mp)                \
-       (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
+       (xfs_sb_version_hasfinobt(&mp->m_sb) ? \
+                       M_IGEO(mp)->inobt_maxlevels : 0)
 
 
 #endif /* __XFS_TRANS_SPACE_H__ */
index d51acc95bc005a61f92e1a594761793968f26eb6..4f595546a639b7c784a670fe31a7cd6870e8d3e2 100644 (file)
@@ -7,19 +7,10 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_format.h"
-#include "xfs_log_format.h"
 #include "xfs_shared.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
 
 /* Find the size of the AG, in blocks. */
 xfs_agblock_t
@@ -87,14 +78,14 @@ xfs_agino_range(
         * Calculate the first inode, which will be in the first
         * cluster-aligned block after the AGFL.
         */
-       bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align);
+       bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align);
        *first = XFS_AGB_TO_AGINO(mp, bno);
 
        /*
         * Calculate the last inode, which will be at the end of the
         * last (aligned) cluster that can be allocated in the AG.
         */
-       bno = round_down(eoag, mp->m_cluster_align);
+       bno = round_down(eoag, M_IGEO(mp)->cluster_align);
        *last = XFS_AGB_TO_AGINO(mp, bno) - 1;
 }
 
index adaeabdefdd33ad7ab35e55a64acd69a5a62e6c4..16b09b94144187c81c38e93c8892d697fd7dc6c2 100644 (file)
@@ -9,20 +9,13 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_inode.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
-#include "scrub/trace.h"
 
 /* Superblock */
 
@@ -646,7 +639,7 @@ xchk_agfl_block(
        xchk_agfl_block_xref(sc, agbno);
 
        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-               return XFS_BTREE_QUERY_RANGE_ABORT;
+               return XFS_ITER_ABORT;
 
        return 0;
 }
@@ -737,7 +730,7 @@ xchk_agfl(
        /* Check the blocks in the AGFL. */
        error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
                        sc->sa.agfl_bp, xchk_agfl_block, &sai);
-       if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+       if (error == XFS_ITER_ABORT) {
                error = 0;
                goto out_free;
        }
index 64e31f87d4907ada7d775ef3e3d6d729bdceeffb..7a1a38b636a91b20a7745ca42264cb68c88b2079 100644 (file)
@@ -9,22 +9,17 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_inode.h"
 #include "xfs_alloc.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_rmap.h"
 #include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
 #include "xfs_refcount_btree.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
index 44883e9112ad06b9db9bdee40a4cdd96ec8b31af..a43d1813c4ffe006a125bc2221809c5b8fbfed8b 100644 (file)
@@ -9,19 +9,12 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_alloc.h"
 #include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/btree.h"
-#include "scrub/trace.h"
 
 /*
  * Set us up to scrub free space btrees.
index dce74ec570389a21204e40ddd14d4e1f619bebf9..1afc58bf71dd81e7a9bfc00d062ce217fdfc9abd 100644 (file)
@@ -9,26 +9,62 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
-#include "xfs_dir2.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/dabtree.h"
-#include "scrub/trace.h"
+#include "scrub/attr.h"
 
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
+/*
+ * Allocate enough memory to hold an attr value and attr block bitmaps,
+ * reallocating the buffer if necessary.  Buffer contents are not preserved
+ * across a reallocation.
+ */
+int
+xchk_setup_xattr_buf(
+       struct xfs_scrub        *sc,
+       size_t                  value_size,
+       xfs_km_flags_t          flags)
+{
+       size_t                  sz;
+       struct xchk_xattr_buf   *ab = sc->buf;
+
+       /*
+        * We need enough space to read an xattr value from the file or enough
+        * space to hold three copies of the xattr free space bitmap.  We don't
+        * need the buffer space for both purposes at the same time.
+        */
+       sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+       sz = max_t(size_t, sz, value_size);
+
+       /*
+        * If there's already a buffer, figure out if we need to reallocate it
+        * to accommodate a larger size.
+        */
+       if (ab) {
+               if (sz <= ab->sz)
+                       return 0;
+               kmem_free(ab);
+               sc->buf = NULL;
+       }
+
+       /*
+        * Don't zero the buffer upon allocation to avoid runtime overhead.
+        * All users must be careful never to read uninitialized contents.
+        */
+       ab = kmem_alloc_large(sizeof(*ab) + sz, flags);
+       if (!ab)
+               return -ENOMEM;
+
+       ab->sz = sz;
+       sc->buf = ab;
+       return 0;
+}
 
 /* Set us up to scrub an inode's extended attributes. */
 int
@@ -36,19 +72,18 @@ xchk_setup_xattr(
        struct xfs_scrub        *sc,
        struct xfs_inode        *ip)
 {
-       size_t                  sz;
+       int                     error;
 
        /*
-        * Allocate the buffer without the inode lock held.  We need enough
-        * space to read every xattr value in the file or enough space to
-        * hold three copies of the xattr free space bitmap.  (Not both at
-        * the same time.)
+        * We failed to get memory while checking attrs, so this time try to
+        * get all the memory we're ever going to need.  Allocate the buffer
+        * without the inode lock held, which means we can sleep.
         */
-       sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) *
-                       BITS_TO_LONGS(sc->mp->m_attr_geo->blksize));
-       sc->buf = kmem_zalloc_large(sz, KM_SLEEP);
-       if (!sc->buf)
-               return -ENOMEM;
+       if (sc->flags & XCHK_TRY_HARDER) {
+               error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP);
+               if (error)
+                       return error;
+       }
 
        return xchk_setup_inode_contents(sc, ip, 0);
 }
@@ -83,7 +118,7 @@ xchk_xattr_listent(
        sx = container_of(context, struct xchk_xattr, context);
 
        if (xchk_should_terminate(sx->sc, &error)) {
-               context->seen_enough = 1;
+               context->seen_enough = error;
                return;
        }
 
@@ -99,6 +134,19 @@ xchk_xattr_listent(
                return;
        }
 
+       /*
+        * Try to allocate enough memory to extrat the attr value.  If that
+        * doesn't work, we overload the seen_enough variable to convey
+        * the error message back to the main scrub function.
+        */
+       error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL);
+       if (error == -ENOMEM)
+               error = -EDEADLOCK;
+       if (error) {
+               context->seen_enough = error;
+               return;
+       }
+
        args.flags = ATTR_KERNOTIME;
        if (flags & XFS_ATTR_ROOT)
                args.flags |= ATTR_ROOT;
@@ -111,8 +159,8 @@ xchk_xattr_listent(
        args.namelen = namelen;
        args.hashval = xfs_da_hashname(args.name, args.namelen);
        args.trans = context->tp;
-       args.value = sx->sc->buf;
-       args.valuelen = XATTR_SIZE_MAX;
+       args.value = xchk_xattr_valuebuf(sx->sc);
+       args.valuelen = valuelen;
 
        error = xfs_attr_get_ilocked(context->dp, &args);
        if (error == -EEXIST)
@@ -125,7 +173,7 @@ xchk_xattr_listent(
                                             args.blkno);
 fail_xref:
        if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-               context->seen_enough = 1;
+               context->seen_enough = XFS_ITER_ABORT;
        return;
 }
 
@@ -170,13 +218,12 @@ xchk_xattr_check_freemap(
        unsigned long                   *map,
        struct xfs_attr3_icleaf_hdr     *leafhdr)
 {
-       unsigned long                   *freemap;
-       unsigned long                   *dstmap;
+       unsigned long                   *freemap = xchk_xattr_freemap(sc);
+       unsigned long                   *dstmap = xchk_xattr_dstmap(sc);
        unsigned int                    mapsize = sc->mp->m_attr_geo->blksize;
        int                             i;
 
        /* Construct bitmap of freemap contents. */
-       freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
        bitmap_zero(freemap, mapsize);
        for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
                if (!xchk_xattr_set_map(sc, freemap,
@@ -186,7 +233,6 @@ xchk_xattr_check_freemap(
        }
 
        /* Look for bits that are set in freemap and are marked in use. */
-       dstmap = freemap + BITS_TO_LONGS(mapsize);
        return bitmap_and(dstmap, freemap, map, mapsize) == 0;
 }
 
@@ -201,13 +247,13 @@ xchk_xattr_entry(
        char                            *buf_end,
        struct xfs_attr_leafblock       *leaf,
        struct xfs_attr3_icleaf_hdr     *leafhdr,
-       unsigned long                   *usedmap,
        struct xfs_attr_leaf_entry      *ent,
        int                             idx,
        unsigned int                    *usedbytes,
        __u32                           *last_hashval)
 {
        struct xfs_mount                *mp = ds->state->mp;
+       unsigned long                   *usedmap = xchk_xattr_usedmap(ds->sc);
        char                            *name_end;
        struct xfs_attr_leaf_name_local *lentry;
        struct xfs_attr_leaf_name_remote *rentry;
@@ -267,16 +313,26 @@ xchk_xattr_block(
        struct xfs_attr_leafblock       *leaf = bp->b_addr;
        struct xfs_attr_leaf_entry      *ent;
        struct xfs_attr_leaf_entry      *entries;
-       unsigned long                   *usedmap = ds->sc->buf;
+       unsigned long                   *usedmap;
        char                            *buf_end;
        size_t                          off;
        __u32                           last_hashval = 0;
        unsigned int                    usedbytes = 0;
        unsigned int                    hdrsize;
        int                             i;
+       int                             error;
 
        if (*last_checked == blk->blkno)
                return 0;
+
+       /* Allocate memory for block usage checking. */
+       error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL);
+       if (error == -ENOMEM)
+               return -EDEADLOCK;
+       if (error)
+               return error;
+       usedmap = xchk_xattr_usedmap(ds->sc);
+
        *last_checked = blk->blkno;
        bitmap_zero(usedmap, mp->m_attr_geo->blksize);
 
@@ -324,7 +380,7 @@ xchk_xattr_block(
 
                /* Check the entry and nameval. */
                xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
-                               usedmap, ent, i, &usedbytes, &last_hashval);
+                               ent, i, &usedbytes, &last_hashval);
 
                if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
                        goto out;
@@ -464,6 +520,10 @@ xchk_xattr(
        error = xfs_attr_list_int_ilocked(&sx.context);
        if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
                goto out;
+
+       /* Did our listent function try to return any errors? */
+       if (sx.context.seen_enough < 0)
+               error = sx.context.seen_enough;
 out:
        return error;
 }
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
new file mode 100644 (file)
index 0000000..13a1d2e
--- /dev/null
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_ATTR_H__
+#define __XFS_SCRUB_ATTR_H__
+
+/*
+ * Temporary storage for online scrub and repair of extended attributes.
+ */
+struct xchk_xattr_buf {
+       /* Size of @buf, in bytes. */
+       size_t                  sz;
+
+       /*
+        * Memory buffer -- either used for extracting attr values while
+        * walking the attributes; or for computing attr block bitmaps when
+        * checking the attribute tree.
+        *
+        * Each bitmap contains enough bits to track every byte in an attr
+        * block (rounded up to the size of an unsigned long).  The attr block
+        * used space bitmap starts at the beginning of the buffer; the free
+        * space bitmap follows immediately after; and we have a third buffer
+        * for storing intermediate bitmap results.
+        */
+       uint8_t                 buf[0];
+};
+
+/* A place to store attribute values. */
+static inline uint8_t *
+xchk_xattr_valuebuf(
+       struct xfs_scrub        *sc)
+{
+       struct xchk_xattr_buf   *ab = sc->buf;
+
+       return ab->buf;
+}
+
+/* A bitmap of space usage computed by walking an attr leaf block. */
+static inline unsigned long *
+xchk_xattr_usedmap(
+       struct xfs_scrub        *sc)
+{
+       struct xchk_xattr_buf   *ab = sc->buf;
+
+       return (unsigned long *)ab->buf;
+}
+
+/* A bitmap of free space computed by walking attr leaf block free info. */
+static inline unsigned long *
+xchk_xattr_freemap(
+       struct xfs_scrub        *sc)
+{
+       return xchk_xattr_usedmap(sc) +
+                       BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+/* A bitmap used to hold temporary results. */
+static inline unsigned long *
+xchk_xattr_dstmap(
+       struct xfs_scrub        *sc)
+{
+       return xchk_xattr_freemap(sc) +
+                       BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size,
+               xfs_km_flags_t flags);
+
+#endif /* __XFS_SCRUB_ATTR_H__ */
index fdadc9e1dc49ea6a245258feeda14e36e226a3b0..3d47d111be5ae9413ee28e5f1e1e8da0852b7bb0 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_btree.h"
-#include "scrub/xfs_scrub.h"
-#include "scrub/scrub.h"
-#include "scrub/common.h"
-#include "scrub/trace.h"
-#include "scrub/repair.h"
 #include "scrub/bitmap.h"
 
 /*
index a703cd58a90e678854ac220f5661b9fb55b9ee8f..1bd29fdc2ab586945251084eebca671c9ec066b6 100644 (file)
@@ -9,27 +9,19 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
 #include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
 #include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_rmap.h"
 #include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/btree.h"
-#include "scrub/trace.h"
 
 /* Set us up with an inode's bmap. */
 int
index 117910db51b809ebeea0196182e05f0dd0c54611..f52a7b8256f96c7d5eadd58ce034c90f0b61299b 100644 (file)
@@ -9,14 +9,7 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/btree.h"
index 973aa59975e328af594cbf7d7853ae1eac80bd70..18876056e5e02af78d29f6c7a5ac24fc74c3b32c 100644 (file)
@@ -9,22 +9,16 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_inode.h"
 #include "xfs_icache.h"
-#include "xfs_itable.h"
 #include "xfs_alloc.h"
 #include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_rmap.h"
 #include "xfs_rmap_btree.h"
 #include "xfs_trans_priv.h"
 #include "xfs_attr.h"
 #include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
-#include "scrub/btree.h"
 #include "scrub/repair.h"
 #include "scrub/health.h"
 
index 90527b094878971f831c78daafe2483dd99e83d2..94c4f1de1922f31ea5f9ffe395792fe3a918c531 100644 (file)
@@ -9,20 +9,12 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
index a38a22785a1a28e6a7a50b533c1103f5caf2ebd0..1e2e11721eb993381879b2458a894d4178a3baa3 100644 (file)
@@ -9,24 +9,14 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
 #include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
-#include "scrub/trace.h"
 #include "scrub/dabtree.h"
 
 /* Set us up to scrub directories. */
index 07c11e3e6437c40658838a60d0cc8ca5807b2603..fc3f510c9034419465fef5b95e2ffa8a15398b6c 100644 (file)
@@ -9,22 +9,10 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_inode.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
-#include "xfs_rmap.h"
-#include "xfs_error.h"
-#include "xfs_errortag.h"
-#include "xfs_icache.h"
 #include "xfs_health.h"
-#include "xfs_bmap.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
index 23cf8e2f25db66905097246387fd8ebe1d329854..b2f602811e9dfcdc577cbc737b473624a08fe282 100644 (file)
@@ -7,18 +7,10 @@
 #include "xfs_fs.h"
 #include "xfs_shared.h"
 #include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
 #include "xfs_sb.h"
-#include "xfs_inode.h"
 #include "xfs_health.h"
 #include "scrub/scrub.h"
-#include "scrub/health.h"
 
 /*
  * Scrub and In-Core Filesystem Health Assessments
index 9b47117180cb1e8baaa4c1ae85a72bfc285ec383..681758704fda30e6250eb3322a2199d7010d9928 100644 (file)
@@ -9,21 +9,14 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
-#include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_icache.h"
 #include "xfs_rmap.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/btree.h"
@@ -230,7 +223,7 @@ xchk_iallocbt_check_cluster(
        int                             error = 0;
 
        nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
-                       mp->m_inodes_per_cluster);
+                       M_IGEO(mp)->inodes_per_cluster);
 
        /* Map this inode cluster */
        agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
@@ -251,7 +244,7 @@ xchk_iallocbt_check_cluster(
         */
        ir_holemask = (irec->ir_holemask & cluster_mask);
        imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
-       imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+       imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
        imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
                        mp->m_sb.sb_inodelog;
 
@@ -276,12 +269,12 @@ xchk_iallocbt_check_cluster(
        /* If any part of this is a hole, skip it. */
        if (ir_holemask) {
                xchk_xref_is_not_owned_by(bs->sc, agbno,
-                               mp->m_blocks_per_cluster,
+                               M_IGEO(mp)->blocks_per_cluster,
                                &XFS_RMAP_OINFO_INODES);
                return 0;
        }
 
-       xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster,
+       xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster,
                        &XFS_RMAP_OINFO_INODES);
 
        /* Grab the inode cluster buffer. */
@@ -333,7 +326,7 @@ xchk_iallocbt_check_clusters(
         */
        for (cluster_base = 0;
             cluster_base < XFS_INODES_PER_CHUNK;
-            cluster_base += bs->sc->mp->m_inodes_per_cluster) {
+            cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) {
                error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
                if (error)
                        break;
@@ -355,6 +348,7 @@ xchk_iallocbt_rec_alignment(
 {
        struct xfs_mount                *mp = bs->sc->mp;
        struct xchk_iallocbt            *iabt = bs->private;
+       struct xfs_ino_geometry         *igeo = M_IGEO(mp);
 
        /*
         * finobt records have different positioning requirements than inobt
@@ -372,7 +366,7 @@ xchk_iallocbt_rec_alignment(
                unsigned int    imask;
 
                imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
-                               mp->m_cluster_align_inodes) - 1;
+                               igeo->cluster_align_inodes) - 1;
                if (irec->ir_startino & imask)
                        xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
                return;
@@ -400,17 +394,17 @@ xchk_iallocbt_rec_alignment(
        }
 
        /* inobt records must be aligned to cluster and inoalignmnt size. */
-       if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) {
+       if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) {
                xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
                return;
        }
 
-       if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) {
+       if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) {
                xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
                return;
        }
 
-       if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK)
+       if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK)
                return;
 
        /*
@@ -419,7 +413,7 @@ xchk_iallocbt_rec_alignment(
         * after this one.
         */
        iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
-       iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster;
+       iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster;
 }
 
 /* Scrub an inobt/finobt record. */
index e213efc194a1d6e2417f941da0be04691befb3f7..6d483ab29e6397e8084935f2046face4b624097e 100644 (file)
@@ -9,27 +9,17 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_inode_buf.h"
-#include "xfs_inode_fork.h"
 #include "xfs_ialloc.h"
 #include "xfs_da_format.h"
 #include "xfs_reflink.h"
 #include "xfs_rmap.h"
-#include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/btree.h"
-#include "scrub/trace.h"
 
 /*
  * Grab total control of the inode metadata.  It doesn't matter here if
index d5d197f1b80f92d071a2fed4b01240bc65be393a..c962bd534690789c0931be1211ddc0e4d55a1b20 100644 (file)
@@ -9,21 +9,13 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
 #include "xfs_icache.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
-#include "scrub/trace.h"
 
 /* Set us up to scrub parents. */
 int
index 5dfe2b5924db4f41243a9ebca0d747a28a5c6820..0a33b4421c32b1b2a239d58842b48077b312f19d 100644 (file)
@@ -9,24 +9,13 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
 #include "xfs_quota.h"
 #include "xfs_qm.h"
-#include "xfs_dquot.h"
-#include "xfs_dquot_item.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
-#include "scrub/trace.h"
 
 /* Convert a scrub type code to a DQ flag, or return 0 if error. */
 static inline uint
@@ -144,7 +133,7 @@ xchk_quota_item(
        if (bsoft > bhard)
                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
-       if (ihard > mp->m_maxicount)
+       if (ihard > M_IGEO(mp)->maxicount)
                xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
        if (isoft > ihard)
                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
index 708b4158eb903b40fe7d577148f4adee8bdb554d..93b3793bc5b31a91dc466dc0a904a71c549e8dd0 100644 (file)
@@ -7,22 +7,12 @@
 #include "xfs_fs.h"
 #include "xfs_shared.h"
 #include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
 #include "xfs_rmap.h"
 #include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/btree.h"
-#include "scrub/trace.h"
 
 /*
  * Set us up to scrub reference count btrees.
index eb358f0f5e0ad1151d6d9e4d4cd7d5efb2ed58d8..4cfeec57fb05c30a0f2d5d27d7d9972ea0a5b08a 100644 (file)
@@ -9,29 +9,21 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_inode.h"
-#include "xfs_icache.h"
 #include "xfs_alloc.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_rmap.h"
 #include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_extent_busy.h"
 #include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
 #include "xfs_quota.h"
-#include "xfs_attr.h"
-#include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
@@ -357,7 +349,7 @@ xrep_init_btblock(
        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
                        XFS_FSB_TO_BB(mp, 1), 0);
        xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
-       xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
+       xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
        xfs_trans_log_buf(tp, bp, 0, bp->b_length);
        bp->b_ops = ops;
@@ -672,7 +664,7 @@ xrep_findroot_agfl_walk(
 {
        xfs_agblock_t           *agbno = priv;
 
-       return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
+       return (*agbno == bno) ? XFS_ITER_ABORT : 0;
 }
 
 /* Does this block match the btree information passed in? */
@@ -702,7 +694,7 @@ xrep_findroot_block(
        if (owner == XFS_RMAP_OWN_AG) {
                error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
                                xrep_findroot_agfl_walk, &agbno);
-               if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+               if (error == XFS_ITER_ABORT)
                        return 0;
                if (error)
                        return error;
index 92a140c5b55e32c3b6b4620104b78cac7ddd68f3..8d4cefd761c1dc843915e91b7c26d5f5fb6449d4 100644 (file)
@@ -9,21 +9,12 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
 #include "xfs_rmap.h"
 #include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/btree.h"
-#include "scrub/trace.h"
 
 /*
  * Set us up to scrub reverse mapping btrees.
index dbe115b075f714007aef48b16e8b765629f6c284..c642bc206c41d8eb9a99aa1aff8b1561e7b5c689 100644 (file)
@@ -9,19 +9,12 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_inode.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
-#include "scrub/trace.h"
 
 /* Set us up with the realtime metadata locked. */
 int
index f630389ee176b14a5aeecd7dfec6cfa9ce2d81b2..15c8c5f3f688d1b3e905229ab1d13e8fc9bf8685 100644 (file)
@@ -9,36 +9,16 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
-#include "xfs_refcount_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_rmap_btree.h"
 #include "xfs_quota.h"
 #include "xfs_qm.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
-#include "scrub/btree.h"
 #include "scrub/repair.h"
 #include "scrub/health.h"
 
index f7ebaa9469997a66c9ca518f6b528ff2ae718a0f..99c0b1234c3cae488db442914729a974ae8ded63 100644 (file)
@@ -9,19 +9,11 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
 #include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
 #include "xfs_inode.h"
-#include "xfs_inode_fork.h"
 #include "xfs_symlink.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
-#include "scrub/trace.h"
 
 /* Set us up to scrub a symbolic link. */
 int
index 96feaf8dcdec5600475a2f831d0d44f61437ef87..9eaab2eb5ed3bf65d573e86a9af7e5662d3a126e 100644 (file)
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
-#include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
-#include "scrub/common.h"
 
 /* Figure out which block the btree cursor was pointing to. */
 static inline xfs_fsblock_t
index 8039e35147ddd015d228dd40a818253cb12b4da4..cbda40d40326683236022c3a30b976a72ba257ee 100644 (file)
@@ -4,16 +4,14 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
 
 
index 11f703d4a60568fff5c2fa3e93647ebc8424ca38..761248ee27785afe07be567cf9d4aa87705aa031 100644 (file)
 #include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
 #include "xfs_iomap.h"
 #include "xfs_trace.h"
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
-#include <linux/writeback.h>
 
 /*
  * structure owned by writepages passed to individual writepage calls
@@ -138,8 +133,7 @@ xfs_setfilesize_trans_alloc(
        struct xfs_trans        *tp;
        int                     error;
 
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
-                               XFS_TRANS_NOFS, &tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
        if (error)
                return error;
 
@@ -240,8 +234,16 @@ xfs_end_ioend(
        struct xfs_inode        *ip = XFS_I(ioend->io_inode);
        xfs_off_t               offset = ioend->io_offset;
        size_t                  size = ioend->io_size;
+       unsigned int            nofs_flag;
        int                     error;
 
+       /*
+        * We can allocate memory here while doing writeback on behalf of
+        * memory reclaim.  To avoid memory allocation deadlocks set the
+        * task-wide nofs context for the following operations.
+        */
+       nofs_flag = memalloc_nofs_save();
+
        /*
         * Just clean up the in-memory strutures if the fs has been shut down.
         */
@@ -282,6 +284,8 @@ done:
                list_del_init(&ioend->io_list);
                xfs_destroy_ioend(ioend, error);
        }
+
+       memalloc_nofs_restore(nofs_flag);
 }
 
 /*
@@ -290,13 +294,9 @@ done:
 static bool
 xfs_ioend_can_merge(
        struct xfs_ioend        *ioend,
-       int                     ioend_error,
        struct xfs_ioend        *next)
 {
-       int                     next_error;
-
-       next_error = blk_status_to_errno(next->io_bio->bi_status);
-       if (ioend_error != next_error)
+       if (ioend->io_bio->bi_status != next->io_bio->bi_status)
                return false;
        if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK))
                return false;
@@ -305,11 +305,28 @@ xfs_ioend_can_merge(
                return false;
        if (ioend->io_offset + ioend->io_size != next->io_offset)
                return false;
-       if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next))
-               return false;
        return true;
 }
 
+/*
+ * If the to be merged ioend has a preallocated transaction for file
+ * size updates we need to ensure the ioend it is merged into also
+ * has one.  If it already has one we can simply cancel the transaction
+ * as it is guaranteed to be clean.
+ */
+static void
+xfs_ioend_merge_append_transactions(
+       struct xfs_ioend        *ioend,
+       struct xfs_ioend        *next)
+{
+       if (!ioend->io_append_trans) {
+               ioend->io_append_trans = next->io_append_trans;
+               next->io_append_trans = NULL;
+       } else {
+               xfs_setfilesize_ioend(next, -ECANCELED);
+       }
+}
+
 /* Try to merge adjacent completions. */
 STATIC void
 xfs_ioend_try_merge(
@@ -317,25 +334,16 @@ xfs_ioend_try_merge(
        struct list_head        *more_ioends)
 {
        struct xfs_ioend        *next_ioend;
-       int                     ioend_error;
-       int                     error;
-
-       if (list_empty(more_ioends))
-               return;
-
-       ioend_error = blk_status_to_errno(ioend->io_bio->bi_status);
 
        while (!list_empty(more_ioends)) {
                next_ioend = list_first_entry(more_ioends, struct xfs_ioend,
                                io_list);
-               if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend))
+               if (!xfs_ioend_can_merge(ioend, next_ioend))
                        break;
                list_move_tail(&next_ioend->io_list, &ioend->io_list);
                ioend->io_size += next_ioend->io_size;
-               if (ioend->io_append_trans) {
-                       error = xfs_setfilesize_ioend(next_ioend, 1);
-                       ASSERT(error == 1);
-               }
+               if (next_ioend->io_append_trans)
+                       xfs_ioend_merge_append_transactions(ioend, next_ioend);
        }
 }
 
@@ -626,7 +634,7 @@ allocate_blocks:
  * reference to the ioend to ensure that the ioend completion is only done once
  * all bios have been submitted and the ioend is really done.
  *
- * If @fail is non-zero, it means that we have a situation where some part of
+ * If @status is non-zero, it means that we have a situation where some part of
  * the submission process has failed after we have marked paged for writeback
  * and unlocked them. In this situation, we need to fail the bio and ioend
  * rather than submit it to IO. This typically only happens on a filesystem
@@ -638,21 +646,19 @@ xfs_submit_ioend(
        struct xfs_ioend        *ioend,
        int                     status)
 {
+       unsigned int            nofs_flag;
+
+       /*
+        * We can allocate memory here while doing writeback on behalf of
+        * memory reclaim.  To avoid memory allocation deadlocks set the
+        * task-wide nofs context for the following operations.
+        */
+       nofs_flag = memalloc_nofs_save();
+
        /* Convert CoW extents to regular */
        if (!status && ioend->io_fork == XFS_COW_FORK) {
-               /*
-                * Yuk. This can do memory allocation, but is not a
-                * transactional operation so everything is done in GFP_KERNEL
-                * context. That can deadlock, because we hold pages in
-                * writeback state and GFP_KERNEL allocations can block on them.
-                * Hence we must operate in nofs conditions here.
-                */
-               unsigned nofs_flag;
-
-               nofs_flag = memalloc_nofs_save();
                status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
                                ioend->io_offset, ioend->io_size);
-               memalloc_nofs_restore(nofs_flag);
        }
 
        /* Reserve log space if we might write beyond the on-disk inode size. */
@@ -663,9 +669,10 @@ xfs_submit_ioend(
            !ioend->io_append_trans)
                status = xfs_setfilesize_trans_alloc(ioend);
 
+       memalloc_nofs_restore(nofs_flag);
+
        ioend->io_bio->bi_private = ioend;
        ioend->io_bio->bi_end_io = xfs_end_bio;
-       ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
 
        /*
         * If we are failing the IO now, just mark the ioend with an
@@ -679,7 +686,6 @@ xfs_submit_ioend(
                return status;
        }
 
-       ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
        submit_bio(ioend->io_bio);
        return 0;
 }
@@ -691,7 +697,8 @@ xfs_alloc_ioend(
        xfs_exntst_t            state,
        xfs_off_t               offset,
        struct block_device     *bdev,
-       sector_t                sector)
+       sector_t                sector,
+       struct writeback_control *wbc)
 {
        struct xfs_ioend        *ioend;
        struct bio              *bio;
@@ -699,6 +706,9 @@ xfs_alloc_ioend(
        bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
        bio_set_dev(bio, bdev);
        bio->bi_iter.bi_sector = sector;
+       bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+       bio->bi_write_hint = inode->i_write_hint;
+       wbc_init_bio(wbc, bio);
 
        ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
        INIT_LIST_HEAD(&ioend->io_list);
@@ -719,24 +729,22 @@ xfs_alloc_ioend(
  * so that the bi_private linkage is set up in the right direction for the
  * traversal in xfs_destroy_ioend().
  */
-static void
+static struct bio *
 xfs_chain_bio(
-       struct xfs_ioend        *ioend,
-       struct writeback_control *wbc,
-       struct block_device     *bdev,
-       sector_t                sector)
+       struct bio              *prev)
 {
        struct bio *new;
 
        new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
-       bio_set_dev(new, bdev);
-       new->bi_iter.bi_sector = sector;
-       bio_chain(ioend->io_bio, new);
-       bio_get(ioend->io_bio);         /* for xfs_destroy_ioend */
-       ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
-       ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
-       submit_bio(ioend->io_bio);
-       ioend->io_bio = new;
+       bio_copy_dev(new, prev);/* also copies over blkcg information */
+       new->bi_iter.bi_sector = bio_end_sector(prev);
+       new->bi_opf = prev->bi_opf;
+       new->bi_write_hint = prev->bi_write_hint;
+
+       bio_chain(prev, new);
+       bio_get(prev);          /* for xfs_destroy_ioend */
+       submit_bio(prev);
+       return new;
 }
 
 /*
@@ -772,7 +780,7 @@ xfs_add_to_ioend(
                if (wpc->ioend)
                        list_add(&wpc->ioend->io_list, iolist);
                wpc->ioend = xfs_alloc_ioend(inode, wpc->fork,
-                               wpc->imap.br_state, offset, bdev, sector);
+                               wpc->imap.br_state, offset, bdev, sector, wbc);
        }
 
        merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
@@ -783,11 +791,12 @@ xfs_add_to_ioend(
 
        if (!merged) {
                if (bio_full(wpc->ioend->io_bio, len))
-                       xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
+                       wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio);
                bio_add_page(wpc->ioend->io_bio, page, len, poff);
        }
 
        wpc->ioend->io_size += len;
+       wbc_account_io(wbc, page, len);
 }
 
 STATIC void
index f62b03186c62967bbc8d08e541e318989a6d1503..45a1ea240cbbb0a0b3ded8fcd982169b11789c72 100644 (file)
@@ -28,7 +28,6 @@ extern const struct address_space_operations xfs_dax_aops;
 
 int    xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
-extern void xfs_count_page_state(struct page *, int *, int *);
 extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
 extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *);
 
index 228821b2ebe0195db8be0a59d9eda366515f9a03..dc93c51c17de962794ad688156e0c5cc389de920 100644 (file)
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_inode.h"
-#include "xfs_alloc.h"
 #include "xfs_attr_remote.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
-#include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_trace.h"
 #include "xfs_dir2.h"
-#include "xfs_defer.h"
 
 /*
  * Look at all the extents for this logical region,
@@ -121,7 +116,7 @@ xfs_attr3_leaf_inactive(
        int                     size;
        int                     tmp;
        int                     i;
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
 
        leaf = bp->b_addr;
        xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
index 3d213a7394c5b747dfb5cffc17dfb3d44d66cf03..58fc820a70c6fc6ed131393e50de79e25095a534 100644 (file)
@@ -6,25 +6,20 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
 #include "xfs_mount.h"
 #include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
 #include "xfs_attr_sf.h"
-#include "xfs_attr_remote.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
-#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
 #include "xfs_dir2.h"
 
 STATIC int
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
new file mode 100644 (file)
index 0000000..e2148f2
--- /dev/null
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ */
+#include "xfs.h"
+
+static inline unsigned int bio_max_vecs(unsigned int count)
+{
+       return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
+}
+
+int
+xfs_rw_bdev(
+       struct block_device     *bdev,
+       sector_t                sector,
+       unsigned int            count,
+       char                    *data,
+       unsigned int            op)
+
+{
+       unsigned int            is_vmalloc = is_vmalloc_addr(data);
+       unsigned int            left = count;
+       int                     error;
+       struct bio              *bio;
+
+       if (is_vmalloc && op == REQ_OP_WRITE)
+               flush_kernel_vmap_range(data, count);
+
+       bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
+       bio_set_dev(bio, bdev);
+       bio->bi_iter.bi_sector = sector;
+       bio->bi_opf = op | REQ_META | REQ_SYNC;
+
+       do {
+               struct page     *page = kmem_to_page(data);
+               unsigned int    off = offset_in_page(data);
+               unsigned int    len = min_t(unsigned, left, PAGE_SIZE - off);
+
+               while (bio_add_page(bio, page, len, off) != len) {
+                       struct bio      *prev = bio;
+
+                       bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
+                       bio_copy_dev(bio, prev);
+                       bio->bi_iter.bi_sector = bio_end_sector(prev);
+                       bio->bi_opf = prev->bi_opf;
+                       bio_chain(prev, bio);
+
+                       submit_bio(prev);
+               }
+
+               data += len;
+               left -= len;
+       } while (left > 0);
+
+       error = submit_bio_wait(bio);
+       bio_put(bio);
+
+       if (is_vmalloc && op == REQ_OP_READ)
+               invalidate_kernel_vmap_range(data, count);
+       return error;
+}
index ce45f066995ebec7c89cdce43c57db89bd29e96f..9fa4a7ee8cfc2ebb86fe5fc1216dd92b7e19f672 100644 (file)
@@ -9,17 +9,16 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
+#include "xfs_shared.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
 #include "xfs_bmap_item.h"
 #include "xfs_log.h"
 #include "xfs_bmap.h"
 #include "xfs_icache.h"
-#include "xfs_trace.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_trans_space.h"
 
@@ -95,15 +94,6 @@ xfs_bui_item_format(
                        xfs_bui_log_format_sizeof(buip->bui_format.bui_nextents));
 }
 
-/*
- * Pinning has no meaning for an bui item, so just return.
- */
-STATIC void
-xfs_bui_item_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
 /*
  * The unpin operation is the last place an BUI is manipulated in the log. It is
  * either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -122,72 +112,23 @@ xfs_bui_item_unpin(
        xfs_bui_release(buip);
 }
 
-/*
- * BUI items have no locking or pushing.  However, since BUIs are pulled from
- * the AIL when their corresponding BUDs are committed to disk, their situation
- * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log.  This should help in getting the BUI out of
- * the AIL.
- */
-STATIC uint
-xfs_bui_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       return XFS_ITEM_PINNED;
-}
-
 /*
  * The BUI has been either committed or aborted if the transaction has been
  * cancelled. If the transaction was cancelled, an BUD isn't going to be
  * constructed and thus we free the BUI here directly.
  */
 STATIC void
-xfs_bui_item_unlock(
+xfs_bui_item_release(
        struct xfs_log_item     *lip)
 {
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               xfs_bui_release(BUI_ITEM(lip));
-}
-
-/*
- * The BUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_bui_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
+       xfs_bui_release(BUI_ITEM(lip));
 }
 
-/*
- * The BUI dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_bui_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all bui log items.
- */
 static const struct xfs_item_ops xfs_bui_item_ops = {
        .iop_size       = xfs_bui_item_size,
        .iop_format     = xfs_bui_item_format,
-       .iop_pin        = xfs_bui_item_pin,
        .iop_unpin      = xfs_bui_item_unpin,
-       .iop_unlock     = xfs_bui_item_unlock,
-       .iop_committed  = xfs_bui_item_committed,
-       .iop_push       = xfs_bui_item_push,
-       .iop_committing = xfs_bui_item_committing,
+       .iop_release    = xfs_bui_item_release,
 };
 
 /*
@@ -249,126 +190,241 @@ xfs_bud_item_format(
 }
 
 /*
- * Pinning has no meaning for an bud item, so just return.
+ * The BUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the BUI and free the
+ * BUD.
  */
 STATIC void
-xfs_bud_item_pin(
+xfs_bud_item_release(
        struct xfs_log_item     *lip)
 {
+       struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+
+       xfs_bui_release(budp->bud_buip);
+       kmem_zone_free(xfs_bud_zone, budp);
 }
 
-/*
- * Since pinning has no meaning for an bud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_bud_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
+static const struct xfs_item_ops xfs_bud_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+       .iop_size       = xfs_bud_item_size,
+       .iop_format     = xfs_bud_item_format,
+       .iop_release    = xfs_bud_item_release,
+};
+
+static struct xfs_bud_log_item *
+xfs_trans_get_bud(
+       struct xfs_trans                *tp,
+       struct xfs_bui_log_item         *buip)
 {
+       struct xfs_bud_log_item         *budp;
+
+       budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
+       xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD,
+                         &xfs_bud_item_ops);
+       budp->bud_buip = buip;
+       budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+
+       xfs_trans_add_item(tp, &budp->bud_item);
+       return budp;
 }
 
 /*
- * There isn't much you can do to push on an bud item.  It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Finish an bmap update and log it to the BUD. Note that the
+ * transaction is marked dirty regardless of whether the bmap update
+ * succeeds or fails to support the BUI/BUD lifecycle rules.
  */
-STATIC uint
-xfs_bud_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+static int
+xfs_trans_log_finish_bmap_update(
+       struct xfs_trans                *tp,
+       struct xfs_bud_log_item         *budp,
+       enum xfs_bmap_intent_type       type,
+       struct xfs_inode                *ip,
+       int                             whichfork,
+       xfs_fileoff_t                   startoff,
+       xfs_fsblock_t                   startblock,
+       xfs_filblks_t                   *blockcount,
+       xfs_exntst_t                    state)
 {
-       return XFS_ITEM_PINNED;
+       int                             error;
+
+       error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
+                       startblock, blockcount, state);
+
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the BUI and frees the BUD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
+
+       return error;
 }
 
-/*
- * The BUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the BUI and free the
- * BUD.
- */
-STATIC void
-xfs_bud_item_unlock(
-       struct xfs_log_item     *lip)
+/* Sort bmap intents by inode. */
+static int
+xfs_bmap_update_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
 {
-       struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+       struct xfs_bmap_intent          *ba;
+       struct xfs_bmap_intent          *bb;
 
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-               xfs_bui_release(budp->bud_buip);
-               kmem_zone_free(xfs_bud_zone, budp);
-       }
+       ba = container_of(a, struct xfs_bmap_intent, bi_list);
+       bb = container_of(b, struct xfs_bmap_intent, bi_list);
+       return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
 }
 
-/*
- * When the bud item is committed to disk, all we need to do is delete our
- * reference to our partner bui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_bud_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Get an BUI. */
+STATIC void *
+xfs_bmap_update_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
 {
-       struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+       struct xfs_bui_log_item         *buip;
+
+       ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
+       ASSERT(tp != NULL);
+
+       buip = xfs_bui_init(tp->t_mountp);
+       ASSERT(buip != NULL);
 
        /*
-        * Drop the BUI reference regardless of whether the BUD has been
-        * aborted. Once the BUD transaction is constructed, it is the sole
-        * responsibility of the BUD to release the BUI (even if the BUI is
-        * aborted due to log I/O error).
+        * Get a log_item_desc to point at the new item.
         */
-       xfs_bui_release(budp->bud_buip);
-       kmem_zone_free(xfs_bud_zone, budp);
+       xfs_trans_add_item(tp, &buip->bui_item);
+       return buip;
+}
 
-       return (xfs_lsn_t)-1;
+/* Set the map extent flags for this mapping. */
+static void
+xfs_trans_set_bmap_flags(
+       struct xfs_map_extent           *bmap,
+       enum xfs_bmap_intent_type       type,
+       int                             whichfork,
+       xfs_exntst_t                    state)
+{
+       bmap->me_flags = 0;
+       switch (type) {
+       case XFS_BMAP_MAP:
+       case XFS_BMAP_UNMAP:
+               bmap->me_flags = type;
+               break;
+       default:
+               ASSERT(0);
+       }
+       if (state == XFS_EXT_UNWRITTEN)
+               bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
+       if (whichfork == XFS_ATTR_FORK)
+               bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
 }
 
-/*
- * The BUD dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Log bmap updates in the intent item. */
 STATIC void
-xfs_bud_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+xfs_bmap_update_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
 {
+       struct xfs_bui_log_item         *buip = intent;
+       struct xfs_bmap_intent          *bmap;
+       uint                            next_extent;
+       struct xfs_map_extent           *map;
+
+       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
+
+       /*
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
+        */
+       next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
+       ASSERT(next_extent < buip->bui_format.bui_nextents);
+       map = &buip->bui_format.bui_extents[next_extent];
+       map->me_owner = bmap->bi_owner->i_ino;
+       map->me_startblock = bmap->bi_bmap.br_startblock;
+       map->me_startoff = bmap->bi_bmap.br_startoff;
+       map->me_len = bmap->bi_bmap.br_blockcount;
+       xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
+                       bmap->bi_bmap.br_state);
 }
 
-/*
- * This is the ops vector shared by all bud log items.
- */
-static const struct xfs_item_ops xfs_bud_item_ops = {
-       .iop_size       = xfs_bud_item_size,
-       .iop_format     = xfs_bud_item_format,
-       .iop_pin        = xfs_bud_item_pin,
-       .iop_unpin      = xfs_bud_item_unpin,
-       .iop_unlock     = xfs_bud_item_unlock,
-       .iop_committed  = xfs_bud_item_committed,
-       .iop_push       = xfs_bud_item_push,
-       .iop_committing = xfs_bud_item_committing,
-};
+/* Get an BUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_bmap_update_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_bud(tp, intent);
+}
 
-/*
- * Allocate and initialize an bud item with the given number of extents.
- */
-struct xfs_bud_log_item *
-xfs_bud_init(
-       struct xfs_mount                *mp,
-       struct xfs_bui_log_item         *buip)
+/* Process a deferred rmap update. */
+STATIC int
+xfs_bmap_update_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
+{
+       struct xfs_bmap_intent          *bmap;
+       xfs_filblks_t                   count;
+       int                             error;
+
+       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+       count = bmap->bi_bmap.br_blockcount;
+       error = xfs_trans_log_finish_bmap_update(tp, done_item,
+                       bmap->bi_type,
+                       bmap->bi_owner, bmap->bi_whichfork,
+                       bmap->bi_bmap.br_startoff,
+                       bmap->bi_bmap.br_startblock,
+                       &count,
+                       bmap->bi_bmap.br_state);
+       if (!error && count > 0) {
+               ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+               bmap->bi_bmap.br_blockcount = count;
+               return -EAGAIN;
+       }
+       kmem_free(bmap);
+       return error;
+}
 
+/* Abort all pending BUIs. */
+STATIC void
+xfs_bmap_update_abort_intent(
+       void                            *intent)
 {
-       struct xfs_bud_log_item *budp;
+       xfs_bui_release(intent);
+}
 
-       budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
-       xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops);
-       budp->bud_buip = buip;
-       budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_bmap_update_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_bmap_intent          *bmap;
 
-       return budp;
+       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+       kmem_free(bmap);
 }
 
+const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
+       .max_items      = XFS_BUI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_bmap_update_diff_items,
+       .create_intent  = xfs_bmap_update_create_intent,
+       .abort_intent   = xfs_bmap_update_abort_intent,
+       .log_item       = xfs_bmap_update_log_item,
+       .create_done    = xfs_bmap_update_create_done,
+       .finish_item    = xfs_bmap_update_finish_item,
+       .cancel_item    = xfs_bmap_update_cancel_item,
+};
+
 /*
  * Process a bmap update intent item that was recovered from the log.
  * We need to update some inode's bmbt.
index 89e043a88bb81c078c43a0b173da745e6a3f5186..ad479cc73de84b52d8ae4b866fc075fcdc03066f 100644 (file)
@@ -75,8 +75,6 @@ extern struct kmem_zone       *xfs_bui_zone;
 extern struct kmem_zone        *xfs_bud_zone;
 
 struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *);
-struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
-               struct xfs_bui_log_item *);
 void xfs_bui_item_free(struct xfs_bui_log_item *);
 void xfs_bui_release(struct xfs_bui_log_item *);
 int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
index 06d07f1e310b063db9b4ffd8d393f22d73d6115a..98c6a7a714276b11fe2cb373bca5c50c8e18465a 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
 #include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_trans.h"
-#include "xfs_extfree_item.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
 #include "xfs_trans_space.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
-#include "xfs_log.h"
-#include "xfs_rmap_btree.h"
 #include "xfs_iomap.h"
 #include "xfs_reflink.h"
-#include "xfs_refcount.h"
 
 /* Kernel only BMAP related definitions and functions */
 
@@ -276,7 +271,7 @@ xfs_bmap_count_tree(
        struct xfs_btree_block  *block, *nextblock;
        int                     numrecs;
 
-       error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
+       error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF,
                                                &xfs_bmbt_buf_ops);
        if (error)
                return error;
@@ -287,7 +282,7 @@ xfs_bmap_count_tree(
                /* Not at node above leaves, count this level of nodes */
                nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
                while (nextbno != NULLFSBLOCK) {
-                       error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
+                       error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp,
                                                XFS_BMAP_BTREE_REF,
                                                &xfs_bmbt_buf_ops);
                        if (error)
@@ -321,7 +316,7 @@ xfs_bmap_count_tree(
                        if (nextbno == NULLFSBLOCK)
                                break;
                        bno = nextbno;
-                       error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                       error = xfs_btree_read_bufl(mp, tp, bno, &bp,
                                                XFS_BMAP_BTREE_REF,
                                                &xfs_bmbt_buf_ops);
                        if (error)
index 548344e2512833bbb82f141fe34aefed88a6729e..ca0849043f542657a0a4d7f531cf3f564421b8c9 100644 (file)
@@ -4,24 +4,9 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
 #include <linux/backing-dev.h>
-#include <linux/freezer.h>
 
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
@@ -213,7 +198,7 @@ xfs_buf_free_maps(
        }
 }
 
-struct xfs_buf *
+static struct xfs_buf *
 _xfs_buf_alloc(
        struct xfs_buftarg      *target,
        struct xfs_buf_map      *map,
@@ -243,6 +228,7 @@ _xfs_buf_alloc(
        sema_init(&bp->b_sema, 0); /* held, no waiters */
        spin_lock_init(&bp->b_lock);
        bp->b_target = target;
+       bp->b_mount = target->bt_mount;
        bp->b_flags = flags;
 
        /*
@@ -263,12 +249,11 @@ _xfs_buf_alloc(
                bp->b_maps[i].bm_len = map[i].bm_len;
                bp->b_length += map[i].bm_len;
        }
-       bp->b_io_length = bp->b_length;
 
        atomic_set(&bp->b_pin_count, 0);
        init_waitqueue_head(&bp->b_waiters);
 
-       XFS_STATS_INC(target->bt_mount, xb_create);
+       XFS_STATS_INC(bp->b_mount, xb_create);
        trace_xfs_buf_init(bp, _RET_IP_);
 
        return bp;
@@ -425,12 +410,12 @@ retry:
                                        current->comm, current->pid,
                                        __func__, gfp_mask);
 
-                       XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
+                       XFS_STATS_INC(bp->b_mount, xb_page_retries);
                        congestion_wait(BLK_RW_ASYNC, HZ/50);
                        goto retry;
                }
 
-               XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
+               XFS_STATS_INC(bp->b_mount, xb_page_found);
 
                nbytes = min_t(size_t, size, PAGE_SIZE - offset);
                size -= nbytes;
@@ -909,83 +894,6 @@ xfs_buf_read_uncached(
        return 0;
 }
 
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
-       struct xfs_buf          *bp,
-       size_t                  numblks)
-{
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_page_count = 0;
-       bp->b_addr = NULL;
-       bp->b_length = numblks;
-       bp->b_io_length = numblks;
-
-       ASSERT(bp->b_map_count == 1);
-       bp->b_bn = XFS_BUF_DADDR_NULL;
-       bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
-       bp->b_maps[0].bm_len = bp->b_length;
-}
-
-static inline struct page *
-mem_to_page(
-       void                    *addr)
-{
-       if ((!is_vmalloc_addr(addr))) {
-               return virt_to_page(addr);
-       } else {
-               return vmalloc_to_page(addr);
-       }
-}
-
-int
-xfs_buf_associate_memory(
-       xfs_buf_t               *bp,
-       void                    *mem,
-       size_t                  len)
-{
-       int                     rval;
-       int                     i = 0;
-       unsigned long           pageaddr;
-       unsigned long           offset;
-       size_t                  buflen;
-       int                     page_count;
-
-       pageaddr = (unsigned long)mem & PAGE_MASK;
-       offset = (unsigned long)mem - pageaddr;
-       buflen = PAGE_ALIGN(len + offset);
-       page_count = buflen >> PAGE_SHIFT;
-
-       /* Free any previous set of page pointers */
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_addr = mem;
-
-       rval = _xfs_buf_get_pages(bp, page_count);
-       if (rval)
-               return rval;
-
-       bp->b_offset = offset;
-
-       for (i = 0; i < bp->b_page_count; i++) {
-               bp->b_pages[i] = mem_to_page((void *)pageaddr);
-               pageaddr += PAGE_SIZE;
-       }
-
-       bp->b_io_length = BTOBB(len);
-       bp->b_length = BTOBB(buflen);
-
-       return 0;
-}
-
 xfs_buf_t *
 xfs_buf_get_uncached(
        struct xfs_buftarg      *target,
@@ -1180,7 +1088,7 @@ xfs_buf_lock(
        trace_xfs_buf_lock(bp, _RET_IP_);
 
        if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_target->bt_mount, 0);
+               xfs_log_force(bp->b_mount, 0);
        down(&bp->b_sema);
 
        trace_xfs_buf_lock_done(bp, _RET_IP_);
@@ -1269,7 +1177,7 @@ xfs_buf_ioend_async(
        struct xfs_buf  *bp)
 {
        INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
-       queue_work(bp->b_ioend_wq, &bp->b_ioend_work);
+       queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
 }
 
 void
@@ -1288,7 +1196,7 @@ xfs_buf_ioerror_alert(
        struct xfs_buf          *bp,
        const char              *func)
 {
-       xfs_alert(bp->b_target->bt_mount,
+       xfs_alert(bp->b_mount,
 "metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
                        func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
                        -bp->b_error);
@@ -1307,10 +1215,8 @@ xfs_bwrite(
                         XBF_WRITE_FAIL | XBF_DONE);
 
        error = xfs_buf_submit(bp);
-       if (error) {
-               xfs_force_shutdown(bp->b_target->bt_mount,
-                                  SHUTDOWN_META_IO_ERROR);
-       }
+       if (error)
+               xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
        return error;
 }
 
@@ -1436,21 +1342,8 @@ _xfs_buf_ioapply(
         */
        bp->b_error = 0;
 
-       /*
-        * Initialize the I/O completion workqueue if we haven't yet or the
-        * submitter has not opted to specify a custom one.
-        */
-       if (!bp->b_ioend_wq)
-               bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
-
        if (bp->b_flags & XBF_WRITE) {
                op = REQ_OP_WRITE;
-               if (bp->b_flags & XBF_SYNCIO)
-                       op_flags = REQ_SYNC;
-               if (bp->b_flags & XBF_FUA)
-                       op_flags |= REQ_FUA;
-               if (bp->b_flags & XBF_FLUSH)
-                       op_flags |= REQ_PREFLUSH;
 
                /*
                 * Run the write verifier callback function if it exists. If
@@ -1460,12 +1353,12 @@ _xfs_buf_ioapply(
                if (bp->b_ops) {
                        bp->b_ops->verify_write(bp);
                        if (bp->b_error) {
-                               xfs_force_shutdown(bp->b_target->bt_mount,
+                               xfs_force_shutdown(bp->b_mount,
                                                   SHUTDOWN_CORRUPT_INCORE);
                                return;
                        }
                } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
-                       struct xfs_mount *mp = bp->b_target->bt_mount;
+                       struct xfs_mount *mp = bp->b_mount;
 
                        /*
                         * non-crc filesystems don't attach verifiers during
@@ -1497,7 +1390,7 @@ _xfs_buf_ioapply(
         * subsequent call.
         */
        offset = bp->b_offset;
-       size = BBTOB(bp->b_io_length);
+       size = BBTOB(bp->b_length);
        blk_start_plug(&plug);
        for (i = 0; i < bp->b_map_count; i++) {
                xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
@@ -1543,7 +1436,7 @@ __xfs_buf_submit(
        ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
 
        /* on shutdown we stale and complete the buffer immediately */
-       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+       if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
                xfs_buf_ioerror(bp, -EIO);
                bp->b_flags &= ~XBF_DONE;
                xfs_buf_stale(bp);
@@ -1613,16 +1506,11 @@ xfs_buf_offset(
        return page_address(page) + (offset & (PAGE_SIZE-1));
 }
 
-/*
- *     Move data into or out of a buffer.
- */
 void
-xfs_buf_iomove(
-       xfs_buf_t               *bp,    /* buffer to process            */
-       size_t                  boff,   /* starting buffer offset       */
-       size_t                  bsize,  /* length to copy               */
-       void                    *data,  /* data address                 */
-       xfs_buf_rw_t            mode)   /* read/write/zero flag         */
+xfs_buf_zero(
+       struct xfs_buf          *bp,
+       size_t                  boff,
+       size_t                  bsize)
 {
        size_t                  bend;
 
@@ -1635,23 +1523,13 @@ xfs_buf_iomove(
                page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
                page = bp->b_pages[page_index];
                csize = min_t(size_t, PAGE_SIZE - page_offset,
-                                     BBTOB(bp->b_io_length) - boff);
+                                     BBTOB(bp->b_length) - boff);
 
                ASSERT((csize + page_offset) <= PAGE_SIZE);
 
-               switch (mode) {
-               case XBRW_ZERO:
-                       memset(page_address(page) + page_offset, 0, csize);
-                       break;
-               case XBRW_READ:
-                       memcpy(data, page_address(page) + page_offset, csize);
-                       break;
-               case XBRW_WRITE:
-                       memcpy(page_address(page) + page_offset, data, csize);
-               }
+               memset(page_address(page) + page_offset, 0, csize);
 
                boff += csize;
-               data += csize;
        }
 }
 
@@ -2198,8 +2076,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
         * This allows userspace to disrupt buffer caching for debug/testing
         * purposes.
         */
-       if (XFS_TEST_ERROR(false, bp->b_target->bt_mount,
-                          XFS_ERRTAG_BUF_LRU_REF))
+       if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
                lru_ref = 0;
 
        atomic_set(&bp->b_lru_ref, lru_ref);
@@ -2215,7 +2092,7 @@ xfs_verify_magic(
        struct xfs_buf          *bp,
        __be32                  dmagic)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        int                     idx;
 
        idx = xfs_sb_version_hascrc(&mp->m_sb);
@@ -2233,7 +2110,7 @@ xfs_verify_magic16(
        struct xfs_buf          *bp,
        __be16                  dmagic)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        int                     idx;
 
        idx = xfs_sb_version_hascrc(&mp->m_sb);
index d0b96e071cec197a39ea7cf4c67f777f1bebb046..c6e57a3f409ee7855474bb323d5037a6f96ab97b 100644 (file)
 
 #define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
 
-typedef enum {
-       XBRW_READ = 1,                  /* transfer into target memory */
-       XBRW_WRITE = 2,                 /* transfer from target memory */
-       XBRW_ZERO = 3,                  /* Zero target memory */
-} xfs_buf_rw_t;
-
 #define XBF_READ        (1 << 0) /* buffer intended for reading from device */
 #define XBF_WRITE       (1 << 1) /* buffer intended for writing to device */
 #define XBF_READ_AHEAD  (1 << 2) /* asynchronous read-ahead */
@@ -34,12 +28,7 @@ typedef enum {
 #define XBF_ASYNC       (1 << 4) /* initiator will not wait for completion */
 #define XBF_DONE        (1 << 5) /* all pages in the buffer uptodate */
 #define XBF_STALE       (1 << 6) /* buffer has been staled, do not find it */
-#define XBF_WRITE_FAIL  (1 << 24)/* async writes have failed on this buffer */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO      (1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA                 (1 << 11)/* force cache write through mode */
-#define XBF_FLUSH       (1 << 12)/* flush the disk cache before a write */
+#define XBF_WRITE_FAIL  (1 << 7) /* async writes have failed on this buffer */
 
 /* flags used only as arguments to access routines */
 #define XBF_TRYLOCK     (1 << 16)/* lock requested, but do not wait */
@@ -49,7 +38,6 @@ typedef enum {
 #define _XBF_PAGES      (1 << 20)/* backed by refcounted pages */
 #define _XBF_KMEM       (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q   (1 << 22)/* buffer on a delwri queue */
-#define _XBF_COMPOUND   (1 << 23)/* compound buffer */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -62,15 +50,11 @@ typedef unsigned int xfs_buf_flags_t;
        { XBF_DONE,             "DONE" }, \
        { XBF_STALE,            "STALE" }, \
        { XBF_WRITE_FAIL,       "WRITE_FAIL" }, \
-       { XBF_SYNCIO,           "SYNCIO" }, \
-       { XBF_FUA,              "FUA" }, \
-       { XBF_FLUSH,            "FLUSH" }, \
        { XBF_TRYLOCK,          "TRYLOCK" },    /* should never be set */\
        { XBF_UNMAPPED,         "UNMAPPED" },   /* ditto */\
        { _XBF_PAGES,           "PAGES" }, \
        { _XBF_KMEM,            "KMEM" }, \
-       { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
-       { _XBF_COMPOUND,        "COMPOUND" }
+       { _XBF_DELWRI_Q,        "DELWRI_Q" }
 
 
 /*
@@ -161,13 +145,13 @@ typedef struct xfs_buf {
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
        struct xfs_perag        *b_pag;         /* contains rbtree root */
+       struct xfs_mount        *b_mount;
        xfs_buftarg_t           *b_target;      /* buffer target (device) */
        void                    *b_addr;        /* virtual address of buffer */
        struct work_struct      b_ioend_work;
-       struct workqueue_struct *b_ioend_wq;    /* I/O completion wq */
        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
        struct completion       b_iowait;       /* queue for I/O waiters */
-       void                    *b_log_item;
+       struct xfs_buf_log_item *b_log_item;
        struct list_head        b_li_list;      /* Log items list head */
        struct xfs_trans        *b_transp;
        struct page             **b_pages;      /* array of page pointers */
@@ -175,7 +159,6 @@ typedef struct xfs_buf {
        struct xfs_buf_map      *b_maps;        /* compound buffer map */
        struct xfs_buf_map      __b_map;        /* inline compound buffer map */
        int                     b_map_count;
-       int                     b_io_length;    /* IO size in BBs */
        atomic_t                b_pin_count;    /* pin count */
        atomic_t                b_io_remaining; /* #outstanding I/O requests */
        unsigned int            b_page_count;   /* size of page array */
@@ -209,21 +192,6 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
                           xfs_daddr_t blkno, size_t numblks,
                           xfs_buf_flags_t flags);
 
-struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target,
-                              struct xfs_buf_map *map, int nmaps,
-                              xfs_buf_flags_t flags);
-
-static inline struct xfs_buf *
-xfs_buf_alloc(
-       struct xfs_buftarg      *target,
-       xfs_daddr_t             blkno,
-       size_t                  numblks,
-       xfs_buf_flags_t         flags)
-{
-       DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
-       return _xfs_buf_alloc(target, &map, 1, flags);
-}
-
 struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
                               struct xfs_buf_map *map, int nmaps,
                               xfs_buf_flags_t flags);
@@ -239,11 +207,10 @@ static inline struct xfs_buf *
 xfs_buf_get(
        struct xfs_buftarg      *target,
        xfs_daddr_t             blkno,
-       size_t                  numblks,
-       xfs_buf_flags_t         flags)
+       size_t                  numblks)
 {
        DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
-       return xfs_buf_get_map(target, &map, 1, flags);
+       return xfs_buf_get_map(target, &map, 1, 0);
 }
 
 static inline struct xfs_buf *
@@ -269,9 +236,6 @@ xfs_buf_readahead(
        return xfs_buf_readahead_map(target, &map, 1, ops);
 }
 
-void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
-int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
-
 struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
                                int flags);
 int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
@@ -305,10 +269,7 @@ static inline int xfs_buf_submit(struct xfs_buf *bp)
        return __xfs_buf_submit(bp, wait);
 }
 
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
-                               xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
-           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
 
 /* Buffer Utility Routines */
 extern void *xfs_buf_offset(struct xfs_buf *, size_t);
index 65b32acfa0f6070020f5aec660bc3bba38d64bdd..7dcaec54a20bc368613d96ed9c76ace1dd431d53 100644 (file)
@@ -5,19 +5,17 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
-#include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_log.h"
-#include "xfs_inode.h"
 
 
 kmem_zone_t    *xfs_buf_item_zone;
@@ -520,7 +518,7 @@ xfs_buf_item_push(
        /* has a previous flush failed due to IO errors? */
        if ((bp->b_flags & XBF_WRITE_FAIL) &&
            ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
-               xfs_warn(bp->b_target->bt_mount,
+               xfs_warn(bp->b_mount,
 "Failing async write on buffer block 0x%llx. Retrying async write.",
                         (long long)bp->b_bn);
        }
@@ -594,7 +592,7 @@ xfs_buf_item_put(
  * free the item.
  */
 STATIC void
-xfs_buf_item_unlock(
+xfs_buf_item_release(
        struct xfs_log_item     *lip)
 {
        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
@@ -609,7 +607,7 @@ xfs_buf_item_unlock(
                                                   &lip->li_flags);
 #endif
 
-       trace_xfs_buf_item_unlock(bip);
+       trace_xfs_buf_item_release(bip);
 
        /*
         * The bli dirty state should match whether the blf has logged segments
@@ -639,6 +637,14 @@ xfs_buf_item_unlock(
        xfs_buf_relse(bp);
 }
 
+STATIC void
+xfs_buf_item_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               commit_lsn)
+{
+       return xfs_buf_item_release(lip);
+}
+
 /*
  * This is called to find out where the oldest active copy of the
  * buf log item in the on disk log resides now that the last log
@@ -671,25 +677,15 @@ xfs_buf_item_committed(
        return lsn;
 }
 
-STATIC void
-xfs_buf_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               commit_lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all buf log items.
- */
 static const struct xfs_item_ops xfs_buf_item_ops = {
        .iop_size       = xfs_buf_item_size,
        .iop_format     = xfs_buf_item_format,
        .iop_pin        = xfs_buf_item_pin,
        .iop_unpin      = xfs_buf_item_unpin,
-       .iop_unlock     = xfs_buf_item_unlock,
+       .iop_release    = xfs_buf_item_release,
+       .iop_committing = xfs_buf_item_committing,
        .iop_committed  = xfs_buf_item_committed,
        .iop_push       = xfs_buf_item_push,
-       .iop_committing = xfs_buf_item_committing
 };
 
 STATIC int
@@ -743,7 +739,7 @@ xfs_buf_item_init(
         * this buffer. If we do already have one, there is
         * nothing to do here so return.
         */
-       ASSERT(bp->b_target->bt_mount == mp);
+       ASSERT(bp->b_mount == mp);
        if (bip) {
                ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
                ASSERT(!bp->b_transp);
@@ -980,9 +976,9 @@ xfs_buf_item_relse(
  */
 void
 xfs_buf_attach_iodone(
-       xfs_buf_t       *bp,
-       void            (*cb)(xfs_buf_t *, xfs_log_item_t *),
-       xfs_log_item_t  *lip)
+       struct xfs_buf          *bp,
+       void                    (*cb)(struct xfs_buf *, struct xfs_log_item *),
+       struct xfs_log_item     *lip)
 {
        ASSERT(xfs_buf_islocked(bp));
 
index 90f65f891fabd27210e52a2c9085c677d12fde62..4a054b11011a076c81357a0281e5fa2156b2bf5c 100644 (file)
@@ -39,7 +39,7 @@ struct xfs_buf_log_item;
  * locked, and which 128 byte chunks of the buffer are dirty.
  */
 struct xfs_buf_log_item {
-       xfs_log_item_t          bli_item;       /* common item structure */
+       struct xfs_log_item     bli_item;       /* common item structure */
        struct xfs_buf          *bli_buf;       /* real buffer pointer */
        unsigned int            bli_flags;      /* misc flags */
        unsigned int            bli_recur;      /* lock recursion count */
@@ -55,8 +55,8 @@ bool  xfs_buf_item_put(struct xfs_buf_log_item *);
 void   xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
 bool   xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
 void   xfs_buf_attach_iodone(struct xfs_buf *,
-                             void(*)(struct xfs_buf *, xfs_log_item_t *),
-                             xfs_log_item_t *);
+                             void(*)(struct xfs_buf *, struct xfs_log_item *),
+                             struct xfs_log_item *);
 void   xfs_buf_iodone_callbacks(struct xfs_buf *);
 void   xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
 bool   xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
index 5142e64e2345897b8a770f024dc594a10c009446..283df898dd9f6f5c02701dc99bda8f7d9f3b0599 100644 (file)
@@ -6,17 +6,14 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
-#include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_bmap.h"
 #include "xfs_trans.h"
index d0df0ed50f4b6733d6bdd3b21a62c49402a94476..8ec7aab89044019c846f0082be199bf08f1bbd48 100644 (file)
@@ -4,19 +4,17 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_extent_busy.h"
-#include "xfs_discard.h"
 #include "xfs_trace.h"
 #include "xfs_log.h"
 
index a1af984e4913e94e88b0eac261c5479728d8b24e..fb1ad448308156a1edb9e44630f7565b48223cd2 100644 (file)
 #include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_alloc.h"
 #include "xfs_quota.h"
-#include "xfs_error.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
 #include "xfs_trans_priv.h"
 #include "xfs_qm.h"
-#include "xfs_cksum.h"
 #include "xfs_trace.h"
 #include "xfs_log.h"
 #include "xfs_bmap_btree.h"
@@ -1243,7 +1239,7 @@ xfs_qm_exit(void)
 /*
  * Iterate every dquot of a particular type.  The caller must ensure that the
  * particular quota type is active.  iter_fn can return negative error codes,
- * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating.
+ * or XFS_ITER_ABORT to indicate that it wants to stop iterating.
  */
 int
 xfs_qm_dqiterate(
index 64bd8640f6e81dc6adba863883fb745554db73af..4fe85709d55d245fb65f72c6bd87866c1d5c5ba4 100644 (file)
@@ -34,7 +34,6 @@ typedef struct xfs_dquot {
        uint             dq_flags;      /* various flags (XFS_DQ_*) */
        struct list_head q_lru;         /* global free list of dquots */
        struct xfs_mount*q_mount;       /* filesystem this relates to */
-       struct xfs_trans*q_transp;      /* trans this belongs to currently */
        uint             q_nrefs;       /* # active refs from inodes */
        xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
        int              q_bufoffset;   /* off of dq in buffer (# dquots) */
index 7dedd17c4813172239c2cef774b6c4ab3c068dd2..282ec5af293e8f161e9dd8d9ced18393a0e7d925 100644 (file)
@@ -5,13 +5,13 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_quota.h"
-#include "xfs_error.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
@@ -94,18 +94,6 @@ xfs_qm_dquot_logitem_unpin(
                wake_up(&dqp->q_pinwait);
 }
 
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       /*
-        * We always re-log the entire dquot when it becomes dirty,
-        * so, the latest copy _is_ the only one that matters.
-        */
-       return lsn;
-}
-
 /*
  * This is called to wait for the given dquot to be unpinned.
  * Most of these pin/unpin routines are plagiarized from inode code.
@@ -209,25 +197,14 @@ out_unlock:
        return rval;
 }
 
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction.  If the
- * hold flags is set, do not unlock the dquot.
- */
 STATIC void
-xfs_qm_dquot_logitem_unlock(
+xfs_qm_dquot_logitem_release(
        struct xfs_log_item     *lip)
 {
        struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
 
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
 
-       /*
-        * Clear the transaction pointer in the dquot
-        */
-       dqp->q_transp = NULL;
-
        /*
         * dquots are never 'held' from getting unlocked at the end of
         * a transaction.  Their locking and unlocking is hidden inside the
@@ -237,30 +214,22 @@ xfs_qm_dquot_logitem_unlock(
        xfs_dqunlock(dqp);
 }
 
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
 STATIC void
 xfs_qm_dquot_logitem_committing(
        struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+       xfs_lsn_t               commit_lsn)
 {
+       return xfs_qm_dquot_logitem_release(lip);
 }
 
-/*
- * This is the ops vector for dquots
- */
 static const struct xfs_item_ops xfs_dquot_item_ops = {
        .iop_size       = xfs_qm_dquot_logitem_size,
        .iop_format     = xfs_qm_dquot_logitem_format,
        .iop_pin        = xfs_qm_dquot_logitem_pin,
        .iop_unpin      = xfs_qm_dquot_logitem_unpin,
-       .iop_unlock     = xfs_qm_dquot_logitem_unlock,
-       .iop_committed  = xfs_qm_dquot_logitem_committed,
+       .iop_release    = xfs_qm_dquot_logitem_release,
+       .iop_committing = xfs_qm_dquot_logitem_committing,
        .iop_push       = xfs_qm_dquot_logitem_push,
-       .iop_committing = xfs_qm_dquot_logitem_committing,
        .iop_error      = xfs_dquot_item_error
 };
 
@@ -319,26 +288,6 @@ xfs_qm_qoff_logitem_format(
        xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem));
 }
 
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-}
-
 /*
  * There isn't much you can do to push a quotaoff item.  It is simply
  * stuck waiting for the log to be flushed to disk.
@@ -351,28 +300,6 @@ xfs_qm_qoff_logitem_push(
        return XFS_ITEM_LOCKED;
 }
 
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
 STATIC xfs_lsn_t
 xfs_qm_qoffend_logitem_committed(
        struct xfs_log_item     *lip,
@@ -396,50 +323,17 @@ xfs_qm_qoffend_logitem_committed(
        return (xfs_lsn_t)-1;
 }
 
-/*
- * XXX rcc - don't know quite what to do with this.  I think we can
- * just ignore it.  The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen?  Also, do we need different routines for
- * quotaoff start and quotaoff end?  I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable).  If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               commit_lsn)
-{
-}
-
 static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
        .iop_size       = xfs_qm_qoff_logitem_size,
        .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
        .iop_committed  = xfs_qm_qoffend_logitem_committed,
        .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
 };
 
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
 static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
        .iop_size       = xfs_qm_qoff_logitem_size,
        .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-       .iop_committed  = xfs_qm_qoff_logitem_committed,
        .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
 };
 
 /*
index db9df710a3080c43a964c04d3869a1a208209916..1aed34ccdabc21f9221680c21eacdca4ab6f8d20 100644 (file)
@@ -12,13 +12,13 @@ struct xfs_mount;
 struct xfs_qoff_logitem;
 
 typedef struct xfs_dq_logitem {
-       xfs_log_item_t           qli_item;         /* common portion */
+       struct xfs_log_item      qli_item;         /* common portion */
        struct xfs_dquot        *qli_dquot;        /* dquot ptr */
        xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
 } xfs_dq_logitem_t;
 
 typedef struct xfs_qoff_logitem {
-       xfs_log_item_t           qql_item;      /* common portion */
+       struct xfs_log_item      qql_item;      /* common portion */
        struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
        unsigned int            qql_flags;
 } xfs_qoff_logitem_t;
index a1e177f66404d28184fd99a5711d2c6f0b5005c2..544c9482a0efec22883f168e670c1320ab06eac7 100644 (file)
@@ -4,6 +4,7 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_fs.h"
 #include "xfs_log_format.h"
@@ -353,7 +354,7 @@ xfs_buf_verifier_error(
        size_t                  bufsz,
        xfs_failaddr_t          failaddr)
 {
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_mount        *mp = bp->b_mount;
        xfs_failaddr_t          fa;
        int                     sz;
 
index f2284ceb129f7acdf91874934c3013c6d9b174f3..f1372f9046e389313afa3101a9072d0344e31818 100644 (file)
@@ -4,18 +4,16 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
 #include "xfs_export.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_inode_item.h"
-#include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_log.h"
 #include "xfs_pnfs.h"
index 74ddf66f4cfe463264cdcd2d7ea1dd9fb11bb8f9..86f6512d68643ec833b1aa5c5ef4e12036e90523 100644 (file)
@@ -9,14 +9,18 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
+#include "xfs_shared.h"
 #include "xfs_mount.h"
+#include "xfs_defer.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
 #include "xfs_extfree_item.h"
 #include "xfs_log.h"
 #include "xfs_btree.h"
 #include "xfs_rmap.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_trace.h"
 
 
 kmem_zone_t    *xfs_efi_zone;
@@ -106,15 +110,6 @@ xfs_efi_item_format(
 }
 
 
-/*
- * Pinning has no meaning for an efi item, so just return.
- */
-STATIC void
-xfs_efi_item_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
 /*
  * The unpin operation is the last place an EFI is manipulated in the log. It is
  * either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -132,72 +127,23 @@ xfs_efi_item_unpin(
        xfs_efi_release(efip);
 }
 
-/*
- * Efi items have no locking or pushing.  However, since EFIs are pulled from
- * the AIL when their corresponding EFDs are committed to disk, their situation
- * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log.  This should help in getting the EFI out of
- * the AIL.
- */
-STATIC uint
-xfs_efi_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       return XFS_ITEM_PINNED;
-}
-
 /*
  * The EFI has been either committed or aborted if the transaction has been
  * cancelled. If the transaction was cancelled, an EFD isn't going to be
  * constructed and thus we free the EFI here directly.
  */
 STATIC void
-xfs_efi_item_unlock(
+xfs_efi_item_release(
        struct xfs_log_item     *lip)
 {
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               xfs_efi_release(EFI_ITEM(lip));
-}
-
-/*
- * The EFI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_efi_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * The EFI dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_efi_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
+       xfs_efi_release(EFI_ITEM(lip));
 }
 
-/*
- * This is the ops vector shared by all efi log items.
- */
 static const struct xfs_item_ops xfs_efi_item_ops = {
        .iop_size       = xfs_efi_item_size,
        .iop_format     = xfs_efi_item_format,
-       .iop_pin        = xfs_efi_item_pin,
        .iop_unpin      = xfs_efi_item_unpin,
-       .iop_unlock     = xfs_efi_item_unlock,
-       .iop_committed  = xfs_efi_item_committed,
-       .iop_push       = xfs_efi_item_push,
-       .iop_committing = xfs_efi_item_committing
+       .iop_release    = xfs_efi_item_release,
 };
 
 
@@ -349,136 +295,298 @@ xfs_efd_item_format(
 }
 
 /*
- * Pinning has no meaning for an efd item, so just return.
+ * The EFD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the EFI and free the EFD.
  */
 STATIC void
-xfs_efd_item_pin(
+xfs_efd_item_release(
        struct xfs_log_item     *lip)
 {
+       struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
+       xfs_efi_release(efdp->efd_efip);
+       xfs_efd_item_free(efdp);
 }
 
+static const struct xfs_item_ops xfs_efd_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+       .iop_size       = xfs_efd_item_size,
+       .iop_format     = xfs_efd_item_format,
+       .iop_release    = xfs_efd_item_release,
+};
+
 /*
- * Since pinning has no meaning for an efd item, unpinning does
- * not either.
+ * Allocate an "extent free done" log item that will hold nextents worth of
+ * extents.  The caller must use all nextents extents, because we are not
+ * flexible about this at all.
  */
-STATIC void
-xfs_efd_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
+static struct xfs_efd_log_item *
+xfs_trans_get_efd(
+       struct xfs_trans                *tp,
+       struct xfs_efi_log_item         *efip,
+       unsigned int                    nextents)
 {
+       struct xfs_efd_log_item         *efdp;
+
+       ASSERT(nextents > 0);
+
+       if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
+               efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
+                               (nextents - 1) * sizeof(struct xfs_extent),
+                               KM_SLEEP);
+       } else {
+               efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
+       }
+
+       xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
+                         &xfs_efd_item_ops);
+       efdp->efd_efip = efip;
+       efdp->efd_format.efd_nextents = nextents;
+       efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
+
+       xfs_trans_add_item(tp, &efdp->efd_item);
+       return efdp;
 }
 
 /*
- * There isn't much you can do to push on an efd item.  It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Free an extent and log it to the EFD. Note that the transaction is marked
+ * dirty regardless of whether the extent free succeeds or fails to support the
+ * EFI/EFD lifecycle rules.
  */
-STATIC uint
-xfs_efd_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+static int
+xfs_trans_free_extent(
+       struct xfs_trans                *tp,
+       struct xfs_efd_log_item         *efdp,
+       xfs_fsblock_t                   start_block,
+       xfs_extlen_t                    ext_len,
+       const struct xfs_owner_info     *oinfo,
+       bool                            skip_discard)
 {
-       return XFS_ITEM_PINNED;
+       struct xfs_mount                *mp = tp->t_mountp;
+       struct xfs_extent               *extp;
+       uint                            next_extent;
+       xfs_agnumber_t                  agno = XFS_FSB_TO_AGNO(mp, start_block);
+       xfs_agblock_t                   agbno = XFS_FSB_TO_AGBNO(mp,
+                                                               start_block);
+       int                             error;
+
+       trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
+
+       error = __xfs_free_extent(tp, start_block, ext_len,
+                                 oinfo, XFS_AG_RESV_NONE, skip_discard);
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the EFI and frees the EFD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+
+       next_extent = efdp->efd_next_extent;
+       ASSERT(next_extent < efdp->efd_format.efd_nextents);
+       extp = &(efdp->efd_format.efd_extents[next_extent]);
+       extp->ext_start = start_block;
+       extp->ext_len = ext_len;
+       efdp->efd_next_extent++;
+
+       return error;
 }
 
-/*
- * The EFD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the EFI and free the EFD.
- */
-STATIC void
-xfs_efd_item_unlock(
-       struct xfs_log_item     *lip)
+/* Sort bmap items by AG. */
+static int
+xfs_extent_free_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
 {
-       struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+       struct xfs_mount                *mp = priv;
+       struct xfs_extent_free_item     *ra;
+       struct xfs_extent_free_item     *rb;
+
+       ra = container_of(a, struct xfs_extent_free_item, xefi_list);
+       rb = container_of(b, struct xfs_extent_free_item, xefi_list);
+       return  XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
+               XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
+}
 
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-               xfs_efi_release(efdp->efd_efip);
-               xfs_efd_item_free(efdp);
-       }
+/* Get an EFI. */
+STATIC void *
+xfs_extent_free_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
+{
+       struct xfs_efi_log_item         *efip;
+
+       ASSERT(tp != NULL);
+       ASSERT(count > 0);
+
+       efip = xfs_efi_init(tp->t_mountp, count);
+       ASSERT(efip != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &efip->efi_item);
+       return efip;
 }
 
-/*
- * When the efd item is committed to disk, all we need to do is delete our
- * reference to our partner efi item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from further
- * referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_efd_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Log a free extent to the intent item. */
+STATIC void
+xfs_extent_free_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
 {
-       struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+       struct xfs_efi_log_item         *efip = intent;
+       struct xfs_extent_free_item     *free;
+       uint                            next_extent;
+       struct xfs_extent               *extp;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
 
        /*
-        * Drop the EFI reference regardless of whether the EFD has been
-        * aborted. Once the EFD transaction is constructed, it is the sole
-        * responsibility of the EFD to release the EFI (even if the EFI is
-        * aborted due to log I/O error).
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
         */
-       xfs_efi_release(efdp->efd_efip);
-       xfs_efd_item_free(efdp);
+       next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
+       ASSERT(next_extent < efip->efi_format.efi_nextents);
+       extp = &efip->efi_format.efi_extents[next_extent];
+       extp->ext_start = free->xefi_startblock;
+       extp->ext_len = free->xefi_blockcount;
+}
 
-       return (xfs_lsn_t)-1;
+/* Get an EFD so we can process all the free extents. */
+STATIC void *
+xfs_extent_free_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_efd(tp, intent, count);
 }
 
-/*
- * The EFD dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Process a free extent. */
+STATIC int
+xfs_extent_free_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
+{
+       struct xfs_extent_free_item     *free;
+       int                             error;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+       error = xfs_trans_free_extent(tp, done_item,
+                       free->xefi_startblock,
+                       free->xefi_blockcount,
+                       &free->xefi_oinfo, free->xefi_skip_discard);
+       kmem_free(free);
+       return error;
+}
+
+/* Abort all pending EFIs. */
 STATIC void
-xfs_efd_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+xfs_extent_free_abort_intent(
+       void                            *intent)
 {
+       xfs_efi_release(intent);
 }
 
-/*
- * This is the ops vector shared by all efd log items.
- */
-static const struct xfs_item_ops xfs_efd_item_ops = {
-       .iop_size       = xfs_efd_item_size,
-       .iop_format     = xfs_efd_item_format,
-       .iop_pin        = xfs_efd_item_pin,
-       .iop_unpin      = xfs_efd_item_unpin,
-       .iop_unlock     = xfs_efd_item_unlock,
-       .iop_committed  = xfs_efd_item_committed,
-       .iop_push       = xfs_efd_item_push,
-       .iop_committing = xfs_efd_item_committing
+/* Cancel a free extent. */
+STATIC void
+xfs_extent_free_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_extent_free_item     *free;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+       kmem_free(free);
+}
+
+const struct xfs_defer_op_type xfs_extent_free_defer_type = {
+       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_extent_free_diff_items,
+       .create_intent  = xfs_extent_free_create_intent,
+       .abort_intent   = xfs_extent_free_abort_intent,
+       .log_item       = xfs_extent_free_log_item,
+       .create_done    = xfs_extent_free_create_done,
+       .finish_item    = xfs_extent_free_finish_item,
+       .cancel_item    = xfs_extent_free_cancel_item,
 };
 
 /*
- * Allocate and initialize an efd item with the given number of extents.
+ * AGFL blocks are accounted differently in the reserve pools and are not
+ * inserted into the busy extent list.
  */
-struct xfs_efd_log_item *
-xfs_efd_init(
-       struct xfs_mount        *mp,
-       struct xfs_efi_log_item *efip,
-       uint                    nextents)
-
+STATIC int
+xfs_agfl_free_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
 {
-       struct xfs_efd_log_item *efdp;
-       uint                    size;
+       struct xfs_mount                *mp = tp->t_mountp;
+       struct xfs_efd_log_item         *efdp = done_item;
+       struct xfs_extent_free_item     *free;
+       struct xfs_extent               *extp;
+       struct xfs_buf                  *agbp;
+       int                             error;
+       xfs_agnumber_t                  agno;
+       xfs_agblock_t                   agbno;
+       uint                            next_extent;
+
+       free = container_of(item, struct xfs_extent_free_item, xefi_list);
+       ASSERT(free->xefi_blockcount == 1);
+       agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
+       agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+
+       trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
+
+       error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+       if (!error)
+               error = xfs_free_agfl_block(tp, agno, agbno, agbp,
+                                           &free->xefi_oinfo);
 
-       ASSERT(nextents > 0);
-       if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
-               size = (uint)(sizeof(xfs_efd_log_item_t) +
-                       ((nextents - 1) * sizeof(xfs_extent_t)));
-               efdp = kmem_zalloc(size, KM_SLEEP);
-       } else {
-               efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
-       }
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the EFI and frees the EFD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
 
-       xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
-       efdp->efd_efip = efip;
-       efdp->efd_format.efd_nextents = nextents;
-       efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
+       next_extent = efdp->efd_next_extent;
+       ASSERT(next_extent < efdp->efd_format.efd_nextents);
+       extp = &(efdp->efd_format.efd_extents[next_extent]);
+       extp->ext_start = free->xefi_startblock;
+       extp->ext_len = free->xefi_blockcount;
+       efdp->efd_next_extent++;
 
-       return efdp;
+       kmem_free(free);
+       return error;
 }
 
+/* sub-type with special handling for AGFL deferred frees */
+const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
+       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_extent_free_diff_items,
+       .create_intent  = xfs_extent_free_create_intent,
+       .abort_intent   = xfs_extent_free_abort_intent,
+       .log_item       = xfs_extent_free_log_item,
+       .create_done    = xfs_extent_free_create_done,
+       .finish_item    = xfs_agfl_free_finish_item,
+       .cancel_item    = xfs_extent_free_cancel_item,
+};
+
 /*
  * Process an extent free intent item that was recovered from
  * the log.  We need to free the extents that it describes.
index 2a6a895ca73e542c571cb47d3ac7b86ccfe7511d..16aaab06d4ecc55afa09f1d7880ef41a225ad82b 100644 (file)
@@ -51,7 +51,7 @@ struct kmem_zone;
  * AIL, so at this point both the EFI and EFD are freed.
  */
 typedef struct xfs_efi_log_item {
-       xfs_log_item_t          efi_item;
+       struct xfs_log_item     efi_item;
        atomic_t                efi_refcount;
        atomic_t                efi_next_extent;
        unsigned long           efi_flags;      /* misc flags */
@@ -64,7 +64,7 @@ typedef struct xfs_efi_log_item {
  * have been freed.
  */
 typedef struct xfs_efd_log_item {
-       xfs_log_item_t          efd_item;
+       struct xfs_log_item     efd_item;
        xfs_efi_log_item_t      *efd_efip;
        uint                    efd_next_extent;
        xfs_efd_log_format_t    efd_format;
@@ -79,8 +79,6 @@ extern struct kmem_zone       *xfs_efi_zone;
 extern struct kmem_zone        *xfs_efd_zone;
 
 xfs_efi_log_item_t     *xfs_efi_init(struct xfs_mount *, uint);
-xfs_efd_log_item_t     *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
-                                     uint);
 int                    xfs_efi_copy_format(xfs_log_iovec_t *buf,
                                            xfs_efi_log_format_t *dst_efi_fmt);
 void                   xfs_efi_item_free(xfs_efi_log_item_t *);
index 916a35cae5e94d649d6d8d181647ab870ce558d9..e93bacbd49aed89946e22effc1c47b3026ec865b 100644 (file)
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
-#include "xfs_error.h"
 #include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_ioctl.h"
@@ -28,9 +25,7 @@
 #include "xfs_iomap.h"
 #include "xfs_reflink.h"
 
-#include <linux/dcache.h>
 #include <linux/falloc.h>
-#include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/mman.h>
 
@@ -379,6 +374,7 @@ xfs_dio_write_end_io(
        struct inode            *inode = file_inode(iocb->ki_filp);
        struct xfs_inode        *ip = XFS_I(inode);
        loff_t                  offset = iocb->ki_pos;
+       unsigned int            nofs_flag;
        int                     error = 0;
 
        trace_xfs_end_io_direct_write(ip, offset, size);
@@ -395,10 +391,17 @@ xfs_dio_write_end_io(
         */
        XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
 
+       /*
+        * We can allocate memory here while doing writeback on behalf of
+        * memory reclaim.  To avoid memory allocation deadlocks set the
+        * task-wide nofs context for the following operations.
+        */
+       nofs_flag = memalloc_nofs_save();
+
        if (flags & IOMAP_DIO_COW) {
                error = xfs_reflink_end_cow(ip, offset, size);
                if (error)
-                       return error;
+                       goto out;
        }
 
        /*
@@ -407,8 +410,10 @@ xfs_dio_write_end_io(
         * earlier allows a racing dio read to find unwritten extents before
         * they are converted.
         */
-       if (flags & IOMAP_DIO_UNWRITTEN)
-               return xfs_iomap_write_unwritten(ip, offset, size, true);
+       if (flags & IOMAP_DIO_UNWRITTEN) {
+               error = xfs_iomap_write_unwritten(ip, offset, size, true);
+               goto out;
+       }
 
        /*
         * We need to update the in-core inode size here so that we don't end up
@@ -430,6 +435,8 @@ xfs_dio_write_end_io(
                spin_unlock(&ip->i_flags_lock);
        }
 
+out:
+       memalloc_nofs_restore(nofs_flag);
        return error;
 }
 
index 182501373af2dc429637b936c99542c5a2d90987..574a7a8b4736ba49b297478c0712f16cc8995c42 100644 (file)
@@ -5,22 +5,19 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
 #include "xfs_alloc.h"
 #include "xfs_mru_cache.h"
-#include "xfs_filestream.h"
 #include "xfs_trace.h"
 #include "xfs_ag_resv.h"
 #include "xfs_trans.h"
-#include "xfs_shared.h"
 
 struct xfs_fstrm_item {
        struct xfs_mru_cache_elem       mru;
index 3d76a9e35870adad0a29b2a04e5ab4c8adf02dc9..5a8f9641562aa12903dfbe321846bb770a85a0c1 100644 (file)
@@ -9,16 +9,12 @@
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_error.h"
 #include "xfs_btree.h"
 #include "xfs_rmap_btree.h"
 #include "xfs_trace.h"
-#include "xfs_log.h"
 #include "xfs_rmap.h"
 #include "xfs_alloc.h"
 #include "xfs_bit.h"
index 3d0e0570e3aa1b00bbdd5f9655a9dce8819ace6d..3e61d0cc23f8c6a2b2200a8d4fee04d627cc39fc 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_trans.h"
 #include "xfs_error.h"
-#include "xfs_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_fsops.h"
 #include "xfs_trans_space.h"
-#include "xfs_rtalloc.h"
-#include "xfs_trace.h"
 #include "xfs_log.h"
 #include "xfs_ag.h"
 #include "xfs_ag_resv.h"
@@ -251,9 +247,9 @@ xfs_growfs_data(
        if (mp->m_sb.sb_imax_pct) {
                uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
                do_div(icount, 100);
-               mp->m_maxicount = XFS_FSB_TO_INO(mp, icount);
+               M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount);
        } else
-               mp->m_maxicount = 0;
+               M_IGEO(mp)->maxicount = 0;
 
        /* Update secondary superblocks now the physical grow has completed */
        error = xfs_update_secondary_sbs(mp);
index d0d37738412009355957661fe7e47a18c7522de4..fa55ab8b8d80ef7a6b93e70f82f6a38f449b1ab1 100644 (file)
@@ -4,7 +4,6 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
-#include "xfs_sysctl.h"
 
 /*
  * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
@@ -41,4 +40,7 @@ struct xfs_globals xfs_globals = {
 #else
        .bug_on_assert          =       false,  /* assert failures WARN() */
 #endif
+#ifdef DEBUG
+       .pwork_threads          =       -1,     /* automatic thread detection */
+#endif
 };
index 4c4929f9e7bf382a537daea2586a11d429fa707e..8e0cb05a71424e557e065bf6a5ecfca4962bf041 100644 (file)
@@ -9,12 +9,8 @@
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trace.h"
 #include "xfs_health.h"
@@ -373,7 +369,7 @@ static const struct ioctl_sick_map ino_map[] = {
 void
 xfs_bulkstat_health(
        struct xfs_inode                *ip,
-       struct xfs_bstat                *bs)
+       struct xfs_bulkstat             *bs)
 {
        const struct ioctl_sick_map     *m;
        unsigned int                    sick;
index a76b27565a1898f5a7572e2e871da4011304084b..0b0fd10a36d4da80870e3d3734c6908acf721fa5 100644 (file)
@@ -5,13 +5,13 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
-#include "xfs_error.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_inode_item.h"
@@ -23,8 +23,6 @@
 #include "xfs_dquot.h"
 #include "xfs_reflink.h"
 
-#include <linux/kthread.h>
-#include <linux/freezer.h>
 #include <linux/iversion.h>
 
 /*
index 8381d34cb102f8c6f7541d80173c98d456b7e622..d99a0a3e5f400e767ff6f76296f4596bff6f7f58 100644 (file)
@@ -6,14 +6,9 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
-#include "xfs_format.h"
 #include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_mount.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
-#include "xfs_error.h"
 #include "xfs_icreate_item.h"
 #include "xfs_log.h"
 
@@ -56,80 +51,18 @@ xfs_icreate_item_format(
                        sizeof(struct xfs_icreate_log));
 }
 
-
-/* Pinning has no meaning for the create item, so just return. */
 STATIC void
-xfs_icreate_item_pin(
+xfs_icreate_item_release(
        struct xfs_log_item     *lip)
 {
+       kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip));
 }
 
-
-/* pinning has no meaning for the create item, so just return. */
-STATIC void
-xfs_icreate_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-}
-
-STATIC void
-xfs_icreate_item_unlock(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_icreate_item *icp = ICR_ITEM(lip);
-
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               kmem_zone_free(xfs_icreate_zone, icp);
-       return;
-}
-
-/*
- * Because we have ordered buffers being tracked in the AIL for the inode
- * creation, we don't need the create item after this. Hence we can free
- * the log item and return -1 to tell the caller we're done with the item.
- */
-STATIC xfs_lsn_t
-xfs_icreate_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       struct xfs_icreate_item *icp = ICR_ITEM(lip);
-
-       kmem_zone_free(xfs_icreate_zone, icp);
-       return (xfs_lsn_t)-1;
-}
-
-/* item can never get into the AIL */
-STATIC uint
-xfs_icreate_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       ASSERT(0);
-       return XFS_ITEM_SUCCESS;
-}
-
-/* Ordered buffers do the dependency tracking here, so this does nothing. */
-STATIC void
-xfs_icreate_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all buf log items.
- */
 static const struct xfs_item_ops xfs_icreate_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
        .iop_size       = xfs_icreate_item_size,
        .iop_format     = xfs_icreate_item_format,
-       .iop_pin        = xfs_icreate_item_pin,
-       .iop_unpin      = xfs_icreate_item_unpin,
-       .iop_push       = xfs_icreate_item_push,
-       .iop_unlock     = xfs_icreate_item_unlock,
-       .iop_committed  = xfs_icreate_item_committed,
-       .iop_committing = xfs_icreate_item_committing,
+       .iop_release    = xfs_icreate_item_release,
 };
 
 
index 71d216cf6f875e01516f15cafa673e0b3e0dbb78..6467d5e1df2dd1508f39aa9f3dc6df6ad37c2e5b 100644 (file)
@@ -3,7 +3,6 @@
  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
  * All Rights Reserved.
  */
-#include <linux/log2.h>
 #include <linux/iversion.h>
 
 #include "xfs.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_inode.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
-#include "xfs_attr_sf.h"
 #include "xfs_attr.h"
 #include "xfs_trans_space.h"
 #include "xfs_trans.h"
@@ -32,7 +28,6 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_filestream.h"
-#include "xfs_cksum.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_symlink.h"
@@ -40,7 +35,6 @@
 #include "xfs_log.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
-#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_inode_zone;
 
@@ -441,12 +435,12 @@ xfs_lock_inumorder(int lock_mode, int subclass)
  */
 static void
 xfs_lock_inodes(
-       xfs_inode_t     **ips,
-       int             inodes,
-       uint            lock_mode)
+       struct xfs_inode        **ips,
+       int                     inodes,
+       uint                    lock_mode)
 {
-       int             attempts = 0, i, j, try_lock;
-       xfs_log_item_t  *lp;
+       int                     attempts = 0, i, j, try_lock;
+       struct xfs_log_item     *lp;
 
        /*
         * Currently supports between 2 and 5 inodes with exclusive locking.  We
@@ -485,7 +479,7 @@ again:
                 */
                if (!try_lock) {
                        for (j = (i - 1); j >= 0 && !try_lock; j--) {
-                               lp = (xfs_log_item_t *)ips[j]->i_itemp;
+                               lp = &ips[j]->i_itemp->ili_item;
                                if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
                                        try_lock++;
                        }
@@ -551,7 +545,7 @@ xfs_lock_two_inodes(
        struct xfs_inode        *temp;
        uint                    mode_temp;
        int                     attempts = 0;
-       xfs_log_item_t          *lp;
+       struct xfs_log_item     *lp;
 
        ASSERT(hweight32(ip0_mode) == 1);
        ASSERT(hweight32(ip1_mode) == 1);
@@ -585,7 +579,7 @@ xfs_lock_two_inodes(
         * the second lock. If we can't get it, we must release the first one
         * and try again.
         */
-       lp = (xfs_log_item_t *)ip0->i_itemp;
+       lp = &ip0->i_itemp->ili_item;
        if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
                if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
                        xfs_iunlock(ip0, ip0_mode);
@@ -2537,13 +2531,14 @@ xfs_ifree_cluster(
        xfs_inode_log_item_t    *iip;
        struct xfs_log_item     *lip;
        struct xfs_perag        *pag;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
        xfs_ino_t               inum;
 
        inum = xic->first_ino;
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
-       nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster;
+       nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
 
-       for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) {
+       for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
                /*
                 * The allocation bitmap tells us which inodes of the chunk were
                 * physically allocated. Skip the cluster if an inode falls into
@@ -2551,7 +2546,7 @@ xfs_ifree_cluster(
                 */
                ioffset = inum - xic->first_ino;
                if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
-                       ASSERT(ioffset % mp->m_inodes_per_cluster == 0);
+                       ASSERT(ioffset % igeo->inodes_per_cluster == 0);
                        continue;
                }
 
@@ -2567,7 +2562,7 @@ xfs_ifree_cluster(
                 * to mark all the active inodes on the buffer stale.
                 */
                bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
-                                       mp->m_bsize * mp->m_blocks_per_cluster,
+                                       mp->m_bsize * igeo->blocks_per_cluster,
                                        XBF_UNMAPPED);
 
                if (!bp)
@@ -2614,7 +2609,7 @@ xfs_ifree_cluster(
                 * transaction stale above, which means there is no point in
                 * even trying to lock them.
                 */
-               for (i = 0; i < mp->m_inodes_per_cluster; i++) {
+               for (i = 0; i < igeo->inodes_per_cluster; i++) {
 retry:
                        rcu_read_lock();
                        ip = radix_tree_lookup(&pag->pag_ici_root,
@@ -3472,28 +3467,27 @@ xfs_iflush_cluster(
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_perag        *pag;
        unsigned long           first_index, mask;
-       unsigned long           inodes_per_cluster;
        int                     cilist_size;
        struct xfs_inode        **cilist;
        struct xfs_inode        *cip;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
        int                     nr_found;
        int                     clcount = 0;
        int                     i;
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
 
-       inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-       cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
+       cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *);
        cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
        if (!cilist)
                goto out_put;
 
-       mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
+       mask = ~(igeo->inodes_per_cluster - 1);
        first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
        rcu_read_lock();
        /* really need a gang lookup range call here */
        nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
-                                       first_index, inodes_per_cluster);
+                                       first_index, igeo->inodes_per_cluster);
        if (nr_found == 0)
                goto out_free;
 
index fa1c4fe2ffbfb1fcda3eaaaff3bd8a3b3b3d9b74..c9a502eed20415fd83907b64c2870c4d079cf24c 100644 (file)
@@ -5,6 +5,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
@@ -12,7 +13,6 @@
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_inode_item.h"
-#include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_trans_priv.h"
 #include "xfs_buf_item.h"
@@ -565,7 +565,7 @@ out_unlock:
  * Unlock the inode associated with the inode log item.
  */
 STATIC void
-xfs_inode_item_unlock(
+xfs_inode_item_release(
        struct xfs_log_item     *lip)
 {
        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
@@ -621,23 +621,21 @@ xfs_inode_item_committed(
 STATIC void
 xfs_inode_item_committing(
        struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+       xfs_lsn_t               commit_lsn)
 {
-       INODE_ITEM(lip)->ili_last_lsn = lsn;
+       INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
+       return xfs_inode_item_release(lip);
 }
 
-/*
- * This is the ops vector shared by all buf log items.
- */
 static const struct xfs_item_ops xfs_inode_item_ops = {
        .iop_size       = xfs_inode_item_size,
        .iop_format     = xfs_inode_item_format,
        .iop_pin        = xfs_inode_item_pin,
        .iop_unpin      = xfs_inode_item_unpin,
-       .iop_unlock     = xfs_inode_item_unlock,
+       .iop_release    = xfs_inode_item_release,
        .iop_committed  = xfs_inode_item_committed,
        .iop_push       = xfs_inode_item_push,
-       .iop_committing = xfs_inode_item_committing,
+       .iop_committing = xfs_inode_item_committing,
        .iop_error      = xfs_inode_item_error
 };
 
index 27081eba220c95247a24a417e00ace8d574e3d44..07a60e74c39c80a07e0c43c793d75025635b22be 100644 (file)
@@ -14,7 +14,7 @@ struct xfs_inode;
 struct xfs_mount;
 
 typedef struct xfs_inode_log_item {
-       xfs_log_item_t          ili_item;          /* common portion */
+       struct xfs_log_item     ili_item;          /* common portion */
        struct xfs_inode        *ili_inode;        /* inode ptr */
        xfs_lsn_t               ili_flush_lsn;     /* lsn at last flush */
        xfs_lsn_t               ili_last_lsn;      /* lsn at last transaction */
index fe29aa61293c562131acc90dc540129484234184..6f7848cd5527bc8d32c840c44d4748a1422357fb 100644 (file)
@@ -11,9 +11,8 @@
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_alloc.h"
 #include "xfs_rtalloc.h"
+#include "xfs_iwalk.h"
 #include "xfs_itable.h"
 #include "xfs_error.h"
 #include "xfs_attr.h"
@@ -25,7 +24,6 @@
 #include "xfs_export.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
-#include "xfs_symlink.h"
 #include "xfs_trans.h"
 #include "xfs_acl.h"
 #include "xfs_btree.h"
 #include "xfs_ag.h"
 #include "xfs_health.h"
 
-#include <linux/capability.h>
-#include <linux/cred.h>
-#include <linux/dcache.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
 
 /*
  * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
@@ -721,16 +713,45 @@ out_unlock:
        return error;
 }
 
+/* Return 0 on success or positive error */
+int
+xfs_fsbulkstat_one_fmt(
+       struct xfs_ibulk                *breq,
+       const struct xfs_bulkstat       *bstat)
+{
+       struct xfs_bstat                bs1;
+
+       xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
+       if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1)))
+               return -EFAULT;
+       return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat));
+}
+
+int
+xfs_fsinumbers_fmt(
+       struct xfs_ibulk                *breq,
+       const struct xfs_inumbers       *igrp)
+{
+       struct xfs_inogrp               ig1;
+
+       xfs_inumbers_to_inogrp(&ig1, igrp);
+       if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp)))
+               return -EFAULT;
+       return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp));
+}
+
 STATIC int
-xfs_ioc_bulkstat(
+xfs_ioc_fsbulkstat(
        xfs_mount_t             *mp,
        unsigned int            cmd,
        void                    __user *arg)
 {
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
+       struct xfs_fsop_bulkreq bulkreq;
+       struct xfs_ibulk        breq = {
+               .mp             = mp,
+               .ocount         = 0,
+       };
+       xfs_ino_t               lastino;
        int                     error;
 
        /* done = 1 if there are more stats to get and if bulkstat */
@@ -742,41 +763,243 @@ xfs_ioc_bulkstat(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-       if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+       if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq)))
                return -EFAULT;
 
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+       if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
                return -EFAULT;
 
-       if ((count = bulkreq.icount) <= 0)
+       if (bulkreq.icount <= 0)
                return -EINVAL;
 
        if (bulkreq.ubuffer == NULL)
                return -EINVAL;
 
-       if (cmd == XFS_IOC_FSINUMBERS)
-               error = xfs_inumbers(mp, &inlast, &count,
-                                       bulkreq.ubuffer, xfs_inumbers_fmt);
-       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
-               error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer,
-                                       sizeof(xfs_bstat_t), NULL, &done);
-       else    /* XFS_IOC_FSBULKSTAT */
-               error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
-                                    sizeof(xfs_bstat_t), bulkreq.ubuffer,
-                                    &done);
+       breq.ubuffer = bulkreq.ubuffer;
+       breq.icount = bulkreq.icount;
+
+       /*
+        * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
+        * that we want to stat.  However, FSINUMBERS and FSBULKSTAT expect
+        * that *lastip contains either zero or the number of the last inode to
+        * be examined by the previous call and return results starting with
+        * the next inode after that.  The new bulk request back end functions
+        * take the inode to start with, so we have to compute the startino
+        * parameter from lastino to maintain correct function.  lastino == 0
+        * is a special case because it has traditionally meant "first inode
+        * in filesystem".
+        */
+       if (cmd == XFS_IOC_FSINUMBERS) {
+               breq.startino = lastino ? lastino + 1 : 0;
+               error = xfs_inumbers(&breq, xfs_fsinumbers_fmt);
+               lastino = breq.startino - 1;
+       } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) {
+               breq.startino = lastino;
+               breq.icount = 1;
+               error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt);
+       } else {        /* XFS_IOC_FSBULKSTAT */
+               breq.startino = lastino ? lastino + 1 : 0;
+               error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt);
+               lastino = breq.startino - 1;
+       }
 
        if (error)
                return error;
 
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -EFAULT;
+       if (bulkreq.lastip != NULL &&
+           copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
+               return -EFAULT;
 
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -EFAULT;
+       if (bulkreq.ocount != NULL &&
+           copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
+               return -EFAULT;
+
+       return 0;
+}
+
+/* Return 0 on success or positive error */
+static int
+xfs_bulkstat_fmt(
+       struct xfs_ibulk                *breq,
+       const struct xfs_bulkstat       *bstat)
+{
+       if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat)))
+               return -EFAULT;
+       return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat));
+}
+
+/*
+ * Check the incoming bulk request @hdr from userspace and initialize the
+ * internal @breq bulk request appropriately.  Returns 0 if the bulk request
+ * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual
+ * negative error code.
+ */
+static int
+xfs_bulk_ireq_setup(
+       struct xfs_mount        *mp,
+       struct xfs_bulk_ireq    *hdr,
+       struct xfs_ibulk        *breq,
+       void __user             *ubuffer)
+{
+       if (hdr->icount == 0 ||
+           (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) ||
+           memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+               return -EINVAL;
+
+       breq->startino = hdr->ino;
+       breq->ubuffer = ubuffer;
+       breq->icount = hdr->icount;
+       breq->ocount = 0;
+       breq->flags = 0;
+
+       /*
+        * The @ino parameter is a special value, so we must look it up here.
+        * We're not allowed to have IREQ_AGNO, and we only return one inode
+        * worth of data.
+        */
+       if (hdr->flags & XFS_BULK_IREQ_SPECIAL) {
+               if (hdr->flags & XFS_BULK_IREQ_AGNO)
+                       return -EINVAL;
+
+               switch (hdr->ino) {
+               case XFS_BULK_IREQ_SPECIAL_ROOT:
+                       hdr->ino = mp->m_sb.sb_rootino;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               breq->icount = 1;
        }
 
+       /*
+        * The IREQ_AGNO flag means that we only want results from a given AG.
+        * If @hdr->ino is zero, we start iterating in that AG.  If @hdr->ino is
+        * beyond the specified AG then we return no results.
+        */
+       if (hdr->flags & XFS_BULK_IREQ_AGNO) {
+               if (hdr->agno >= mp->m_sb.sb_agcount)
+                       return -EINVAL;
+
+               if (breq->startino == 0)
+                       breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0);
+               else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno)
+                       return -EINVAL;
+
+               breq->flags |= XFS_IBULK_SAME_AG;
+
+               /* Asking for an inode past the end of the AG?  We're done! */
+               if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno)
+                       return XFS_ITER_ABORT;
+       } else if (hdr->agno)
+               return -EINVAL;
+
+       /* Asking for an inode past the end of the FS?  We're done! */
+       if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount)
+               return XFS_ITER_ABORT;
+
+       return 0;
+}
+
+/*
+ * Update the userspace bulk request @hdr to reflect the end state of the
+ * internal bulk request @breq.
+ */
+static void
+xfs_bulk_ireq_teardown(
+       struct xfs_bulk_ireq    *hdr,
+       struct xfs_ibulk        *breq)
+{
+       hdr->ino = breq->startino;
+       hdr->ocount = breq->ocount;
+}
+
+/* Handle the v5 bulkstat ioctl. */
+STATIC int
+xfs_ioc_bulkstat(
+       struct xfs_mount                *mp,
+       unsigned int                    cmd,
+       struct xfs_bulkstat_req __user  *arg)
+{
+       struct xfs_bulk_ireq            hdr;
+       struct xfs_ibulk                breq = {
+               .mp                     = mp,
+       };
+       int                             error;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
+               return -EFAULT;
+
+       error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat);
+       if (error == XFS_ITER_ABORT)
+               goto out_teardown;
+       if (error < 0)
+               return error;
+
+       error = xfs_bulkstat(&breq, xfs_bulkstat_fmt);
+       if (error)
+               return error;
+
+out_teardown:
+       xfs_bulk_ireq_teardown(&hdr, &breq);
+       if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
+               return -EFAULT;
+
+       return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt(
+       struct xfs_ibulk                *breq,
+       const struct xfs_inumbers       *igrp)
+{
+       if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers)))
+               return -EFAULT;
+       return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers));
+}
+
+/* Handle the v5 inumbers ioctl. */
+STATIC int
+xfs_ioc_inumbers(
+       struct xfs_mount                *mp,
+       unsigned int                    cmd,
+       struct xfs_inumbers_req __user  *arg)
+{
+       struct xfs_bulk_ireq            hdr;
+       struct xfs_ibulk                breq = {
+               .mp                     = mp,
+       };
+       int                             error;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
+               return -EFAULT;
+
+       error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers);
+       if (error == XFS_ITER_ABORT)
+               goto out_teardown;
+       if (error < 0)
+               return error;
+
+       error = xfs_inumbers(&breq, xfs_inumbers_fmt);
+       if (error)
+               return error;
+
+out_teardown:
+       xfs_bulk_ireq_teardown(&hdr, &breq);
+       if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
+               return -EFAULT;
+
        return 0;
 }
 
@@ -1926,7 +2149,12 @@ xfs_file_ioctl(
        case XFS_IOC_FSBULKSTAT_SINGLE:
        case XFS_IOC_FSBULKSTAT:
        case XFS_IOC_FSINUMBERS:
+               return xfs_ioc_fsbulkstat(mp, cmd, arg);
+
+       case XFS_IOC_BULKSTAT:
                return xfs_ioc_bulkstat(mp, cmd, arg);
+       case XFS_IOC_INUMBERS:
+               return xfs_ioc_inumbers(mp, cmd, arg);
 
        case XFS_IOC_FSGEOMETRY_V1:
                return xfs_ioc_fsgeometry(mp, arg, 3);
index 4b17f67c888a057feabfba771e058e51fe791be3..654c0bb1bcf8981c1f863ec315a409c0906365c3 100644 (file)
@@ -77,4 +77,12 @@ xfs_set_dmattrs(
        uint                    evmask,
        uint16_t                state);
 
+struct xfs_ibulk;
+struct xfs_bstat;
+struct xfs_inogrp;
+
+int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq,
+                          const struct xfs_bulkstat *bstat);
+int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp);
+
 #endif
index 614fc6886d24553328d5a08496e2cfc802af421f..7fcf7569743f47a250f0bf2356f7c040b78cc59c 100644 (file)
@@ -3,23 +3,19 @@
  * Copyright (c) 2004-2005 Silicon Graphics, Inc.
  * All Rights Reserved.
  */
-#include <linux/compat.h>
-#include <linux/ioctl.h>
 #include <linux/mount.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
 #include <linux/fsmap.h>
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
+#include "xfs_iwalk.h"
 #include "xfs_itable.h"
-#include "xfs_error.h"
 #include "xfs_fsops.h"
-#include "xfs_alloc.h"
 #include "xfs_rtalloc.h"
 #include "xfs_attr.h"
 #include "xfs_ioctl.h"
@@ -84,27 +80,26 @@ xfs_compat_growfs_rt_copyin(
 }
 
 STATIC int
-xfs_inumbers_fmt_compat(
-       void                    __user *ubuffer,
-       const struct xfs_inogrp *buffer,
-       long                    count,
-       long                    *written)
+xfs_fsinumbers_fmt_compat(
+       struct xfs_ibulk                *breq,
+       const struct xfs_inumbers       *ig)
 {
-       compat_xfs_inogrp_t     __user *p32 = ubuffer;
-       long                    i;
+       struct compat_xfs_inogrp __user *p32 = breq->ubuffer;
+       struct xfs_inogrp               ig1;
+       struct xfs_inogrp               *igrp = &ig1;
 
-       for (i = 0; i < count; i++) {
-               if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
-                   put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
-                   put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
-                       return -EFAULT;
-       }
-       *written = count * sizeof(*p32);
-       return 0;
+       xfs_inumbers_to_inogrp(&ig1, ig);
+
+       if (put_user(igrp->xi_startino,   &p32->xi_startino) ||
+           put_user(igrp->xi_alloccount, &p32->xi_alloccount) ||
+           put_user(igrp->xi_allocmask,  &p32->xi_allocmask))
+               return -EFAULT;
+
+       return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp));
 }
 
 #else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt
 #endif /* BROKEN_X86_ALIGNMENT */
 
 STATIC int
@@ -121,11 +116,14 @@ xfs_ioctl32_bstime_copyin(
        return 0;
 }
 
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+/*
+ * struct xfs_bstat has differing alignment on intel, & bstime_t sizes
+ * everywhere
+ */
 STATIC int
 xfs_ioctl32_bstat_copyin(
-       xfs_bstat_t             *bstat,
-       compat_xfs_bstat_t      __user *bstat32)
+       struct xfs_bstat                *bstat,
+       struct compat_xfs_bstat __user  *bstat32)
 {
        if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
            get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
@@ -171,16 +169,15 @@ xfs_bstime_store_compat(
 
 /* Return 0 on success or positive error (to xfs_bulkstat()) */
 STATIC int
-xfs_bulkstat_one_fmt_compat(
-       void                    __user *ubuffer,
-       int                     ubsize,
-       int                     *ubused,
-       const xfs_bstat_t       *buffer)
+xfs_fsbulkstat_one_fmt_compat(
+       struct xfs_ibulk                *breq,
+       const struct xfs_bulkstat       *bstat)
 {
-       compat_xfs_bstat_t      __user *p32 = ubuffer;
+       struct compat_xfs_bstat __user  *p32 = breq->ubuffer;
+       struct xfs_bstat                bs1;
+       struct xfs_bstat                *buffer = &bs1;
 
-       if (ubsize < sizeof(*p32))
-               return -ENOMEM;
+       xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
 
        if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
            put_user(buffer->bs_mode,     &p32->bs_mode)        ||
@@ -205,37 +202,24 @@ xfs_bulkstat_one_fmt_compat(
            put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
            put_user(buffer->bs_aextents, &p32->bs_aextents))
                return -EFAULT;
-       if (ubused)
-               *ubused = sizeof(*p32);
-       return 0;
-}
 
-STATIC int
-xfs_bulkstat_one_compat(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* buffer to place output in */
-       int             ubsize,         /* size of buffer */
-       int             *ubused,        /* bytes used by me */
-       int             *stat)          /* BULKSTAT_RV_... */
-{
-       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                   xfs_bulkstat_one_fmt_compat,
-                                   ubused, stat);
+       return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat));
 }
 
 /* copied from xfs_ioctl.c */
 STATIC int
-xfs_compat_ioc_bulkstat(
+xfs_compat_ioc_fsbulkstat(
        xfs_mount_t               *mp,
        unsigned int              cmd,
-       compat_xfs_fsop_bulkreq_t __user *p32)
+       struct compat_xfs_fsop_bulkreq __user *p32)
 {
        u32                     addr;
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
+       struct xfs_fsop_bulkreq bulkreq;
+       struct xfs_ibulk        breq = {
+               .mp             = mp,
+               .ocount         = 0,
+       };
+       xfs_ino_t               lastino;
        int                     error;
 
        /*
@@ -244,9 +228,8 @@ xfs_compat_ioc_bulkstat(
         * to userpace memory via bulkreq.ubuffer.  Normally the compat
         * functions and structure size are the correct ones to use ...
         */
-       inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat;
-       bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat;
-       size_t bs_one_size = sizeof(struct compat_xfs_bstat);
+       inumbers_fmt_pf         inumbers_func = xfs_fsinumbers_fmt_compat;
+       bulkstat_one_fmt_pf     bs_one_func = xfs_fsbulkstat_one_fmt_compat;
 
 #ifdef CONFIG_X86_X32
        if (in_x32_syscall()) {
@@ -258,9 +241,8 @@ xfs_compat_ioc_bulkstat(
                 * the data written out in compat layout will not match what
                 * x32 userspace expects.
                 */
-               inumbers_func = xfs_inumbers_fmt;
-               bs_one_func = xfs_bulkstat_one;
-               bs_one_size = sizeof(struct xfs_bstat);
+               inumbers_func = xfs_fsinumbers_fmt;
+               bs_one_func = xfs_fsbulkstat_one_fmt;
        }
 #endif
 
@@ -284,40 +266,55 @@ xfs_compat_ioc_bulkstat(
                return -EFAULT;
        bulkreq.ocount = compat_ptr(addr);
 
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+       if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
                return -EFAULT;
 
-       if ((count = bulkreq.icount) <= 0)
+       if (bulkreq.icount <= 0)
                return -EINVAL;
 
        if (bulkreq.ubuffer == NULL)
                return -EINVAL;
 
+       breq.ubuffer = bulkreq.ubuffer;
+       breq.icount = bulkreq.icount;
+
+       /*
+        * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
+        * that we want to stat.  However, FSINUMBERS and FSBULKSTAT expect
+        * that *lastip contains either zero or the number of the last inode to
+        * be examined by the previous call and return results starting with
+        * the next inode after that.  The new bulk request back end functions
+        * take the inode to start with, so we have to compute the startino
+        * parameter from lastino to maintain correct function.  lastino == 0
+        * is a special case because it has traditionally meant "first inode
+        * in filesystem".
+        */
        if (cmd == XFS_IOC_FSINUMBERS_32) {
-               error = xfs_inumbers(mp, &inlast, &count,
-                               bulkreq.ubuffer, inumbers_func);
+               breq.startino = lastino ? lastino + 1 : 0;
+               error = xfs_inumbers(&breq, inumbers_func);
+               lastino = breq.startino - 1;
        } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
-               int res;
-
-               error = bs_one_func(mp, inlast, bulkreq.ubuffer,
-                               bs_one_size, NULL, &res);
+               breq.startino = lastino;
+               breq.icount = 1;
+               error = xfs_bulkstat_one(&breq, bs_one_func);
+               lastino = breq.startino;
        } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
-               error = xfs_bulkstat(mp, &inlast, &count,
-                       bs_one_func, bs_one_size,
-                       bulkreq.ubuffer, &done);
-       } else
+               breq.startino = lastino ? lastino + 1 : 0;
+               error = xfs_bulkstat(&breq, bs_one_func);
+               lastino = breq.startino - 1;
+       } else {
                error = -EINVAL;
+       }
        if (error)
                return error;
 
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -EFAULT;
+       if (bulkreq.lastip != NULL &&
+           copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
+               return -EFAULT;
 
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -EFAULT;
-       }
+       if (bulkreq.ocount != NULL &&
+           copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
+               return -EFAULT;
 
        return 0;
 }
@@ -577,6 +574,8 @@ xfs_file_compat_ioctl(
        case XFS_IOC_ERROR_CLEARALL:
        case FS_IOC_GETFSMAP:
        case XFS_IOC_SCRUB_METADATA:
+       case XFS_IOC_BULKSTAT:
+       case XFS_IOC_INUMBERS:
                return xfs_file_ioctl(filp, cmd, p);
 #if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32)
        /*
@@ -674,7 +673,7 @@ xfs_file_compat_ioctl(
        case XFS_IOC_FSBULKSTAT_32:
        case XFS_IOC_FSBULKSTAT_SINGLE_32:
        case XFS_IOC_FSINUMBERS_32:
-               return xfs_compat_ioc_bulkstat(mp, cmd, arg);
+               return xfs_compat_ioc_fsbulkstat(mp, cmd, arg);
        case XFS_IOC_FD_TO_HANDLE_32:
        case XFS_IOC_PATH_TO_HANDLE_32:
        case XFS_IOC_PATH_TO_FSHANDLE_32: {
index d28fa824284aaf8198ad50404e86f4793bb70892..7985344d3aa619925a82312b4f387ee815e8f212 100644 (file)
@@ -36,7 +36,7 @@ typedef struct compat_xfs_bstime {
        __s32           tv_nsec;        /* and nanoseconds      */
 } compat_xfs_bstime_t;
 
-typedef struct compat_xfs_bstat {
+struct compat_xfs_bstat {
        __u64           bs_ino;         /* inode number                 */
        __u16           bs_mode;        /* type and mode                */
        __u16           bs_nlink;       /* number of links              */
@@ -61,14 +61,14 @@ typedef struct compat_xfs_bstat {
        __u32           bs_dmevmask;    /* DMIG event mask              */
        __u16           bs_dmstate;     /* DMIG state info              */
        __u16           bs_aextents;    /* attribute number of extents  */
-} __compat_packed compat_xfs_bstat_t;
+} __compat_packed;
 
-typedef struct compat_xfs_fsop_bulkreq {
+struct compat_xfs_fsop_bulkreq {
        compat_uptr_t   lastip;         /* last inode # pointer         */
        __s32           icount;         /* count of entries in buffer   */
        compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
        compat_uptr_t   ocount;         /* output count pointer         */
-} compat_xfs_fsop_bulkreq_t;
+};
 
 #define XFS_IOC_FSBULKSTAT_32 \
        _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
@@ -106,7 +106,7 @@ typedef struct compat_xfs_swapext {
        xfs_off_t               sx_offset;      /* offset into file */
        xfs_off_t               sx_length;      /* leng from offset */
        char                    sx_pad[16];     /* pad space, unused */
-       compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
+       struct compat_xfs_bstat sx_stat;        /* stat of target b4 copy */
 } __compat_packed compat_xfs_swapext_t;
 
 #define XFS_IOC_SWAPEXT_32     _IOWR('X', 109, struct compat_xfs_swapext)
@@ -201,11 +201,11 @@ typedef struct compat_xfs_fsop_geom_v1 {
 #define XFS_IOC_FSGEOMETRY_V1_32  \
        _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
 
-typedef struct compat_xfs_inogrp {
+struct compat_xfs_inogrp {
        __u64           xi_startino;    /* starting inode number        */
        __s32           xi_alloccount;  /* # bits set in allocmask      */
        __u64           xi_allocmask;   /* mask of allocated inodes     */
-} __attribute__((packed)) compat_xfs_inogrp_t;
+} __attribute__((packed));
 
 /* These growfs input structures have padding on the end, so must translate */
 typedef struct compat_xfs_growfs_data {
index 63d323916bba9e42dc3f37d81359b16a6821784b..3a4310d7cb59d4901d7519002f7eae03e5170ab5 100644 (file)
@@ -4,7 +4,6 @@
  * Copyright (c) 2016-2018 Christoph Hellwig.
  * All Rights Reserved.
  */
-#include <linux/iomap.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
@@ -12,7 +11,6 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_bmap_btree.h"
@@ -25,7 +23,6 @@
 #include "xfs_inode_item.h"
 #include "xfs_iomap.h"
 #include "xfs_trace.h"
-#include "xfs_icache.h"
 #include "xfs_quota.h"
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
@@ -779,7 +776,7 @@ xfs_iomap_write_unwritten(
                 * complete here and might deadlock on the iolock.
                 */
                error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
-                               XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+                               XFS_TRANS_RESERVE, &tp);
                if (error)
                        return error;
 
index 74047bd0c1aeb44709ceae3ef779921778c4be0e..ff3c1fae53571e79d139e9117efc8ccc4ec751bd 100644 (file)
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
 #include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
 #include "xfs_acl.h"
 #include "xfs_quota.h"
-#include "xfs_error.h"
 #include "xfs_attr.h"
 #include "xfs_trans.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_symlink.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
-#include "xfs_trans_space.h"
 #include "xfs_iomap.h"
-#include "xfs_defer.h"
 
-#include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/security.h>
-#include <linux/iomap.h>
-#include <linux/slab.h>
 #include <linux/iversion.h>
 
 /*
index 1e1a0af1dd34d2dc96c7f4b7f974add392f41bd9..a8a06bb78ea8e3c942d3d6c45f25f5999ff0a6c3 100644 (file)
 #include "xfs_btree.h"
 #include "xfs_ialloc.h"
 #include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
 #include "xfs_itable.h"
 #include "xfs_error.h"
-#include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_health.h"
 
 /*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
+ * Bulk Stat
+ * =========
+ *
+ * Use the inode walking functions to fill out struct xfs_bulkstat for every
+ * allocated inode, then pass the stat information to some externally provided
+ * iteration function.
  */
-int
+
+struct xfs_bstat_chunk {
+       bulkstat_one_fmt_pf     formatter;
+       struct xfs_ibulk        *breq;
+       struct xfs_bulkstat     *buf;
+};
+
+/*
+ * Fill out the bulkstat info for a single inode and report it somewhere.
+ *
+ * bc->breq->lastino is effectively the inode cursor as we walk through the
+ * filesystem.  Therefore, we update it any time we need to move the cursor
+ * forward, regardless of whether or not we're sending any bstat information
+ * back to userspace.  If the inode is internal metadata or, has been freed
+ * out from under us, we just simply keep going.
+ *
+ * However, if any other type of error happens we want to stop right where we
+ * are so that userspace will call back with exact number of the bad inode and
+ * we can send back an error code.
+ *
+ * Note that if the formatter tells us there's no space left in the buffer we
+ * move the cursor forward and abort the walk.
+ */
+STATIC int
 xfs_bulkstat_one_int(
-       struct xfs_mount        *mp,            /* mount point for filesystem */
-       xfs_ino_t               ino,            /* inode to get data for */
-       void __user             *buffer,        /* buffer to place output in */
-       int                     ubsize,         /* size of buffer */
-       bulkstat_one_fmt_pf     formatter,      /* formatter, copy to user */
-       int                     *ubused,        /* bytes used by me */
-       int                     *stat)          /* BULKSTAT_RV_... */
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               ino,
+       struct xfs_bstat_chunk  *bc)
 {
        struct xfs_icdinode     *dic;           /* dinode core info pointer */
        struct xfs_inode        *ip;            /* incore inode pointer */
        struct inode            *inode;
-       struct xfs_bstat        *buf;           /* return buffer */
-       int                     error = 0;      /* error value */
+       struct xfs_bulkstat     *buf = bc->buf;
+       int                     error = -EINVAL;
 
-       *stat = BULKSTAT_RV_NOTHING;
+       if (xfs_internal_inum(mp, ino))
+               goto out_advance;
 
-       if (!buffer || xfs_internal_inum(mp, ino))
-               return -EINVAL;
-
-       buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
-       if (!buf)
-               return -ENOMEM;
-
-       error = xfs_iget(mp, NULL, ino,
+       error = xfs_iget(mp, tp, ino,
                         (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
                         XFS_ILOCK_SHARED, &ip);
+       if (error == -ENOENT || error == -EINVAL)
+               goto out_advance;
        if (error)
-               goto out_free;
+               goto out;
 
        ASSERT(ip != NULL);
        ASSERT(ip->i_imap.im_blkno != 0);
@@ -64,37 +84,35 @@ xfs_bulkstat_one_int(
        /* xfs_iget returns the following without needing
         * further change.
         */
-       buf->bs_projid_lo = dic->di_projid_lo;
-       buf->bs_projid_hi = dic->di_projid_hi;
+       buf->bs_projectid = xfs_get_projid(ip);
        buf->bs_ino = ino;
        buf->bs_uid = dic->di_uid;
        buf->bs_gid = dic->di_gid;
        buf->bs_size = dic->di_size;
 
        buf->bs_nlink = inode->i_nlink;
-       buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
-       buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
-       buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
-       buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
-       buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
-       buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
+       buf->bs_atime = inode->i_atime.tv_sec;
+       buf->bs_atime_nsec = inode->i_atime.tv_nsec;
+       buf->bs_mtime = inode->i_mtime.tv_sec;
+       buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
+       buf->bs_ctime = inode->i_ctime.tv_sec;
+       buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
+       buf->bs_btime = dic->di_crtime.t_sec;
+       buf->bs_btime_nsec = dic->di_crtime.t_nsec;
        buf->bs_gen = inode->i_generation;
        buf->bs_mode = inode->i_mode;
 
        buf->bs_xflags = xfs_ip2xflags(ip);
-       buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
+       buf->bs_extsize_blks = dic->di_extsize;
        buf->bs_extents = dic->di_nextents;
-       memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
        xfs_bulkstat_health(ip, buf);
-       buf->bs_dmevmask = dic->di_dmevmask;
-       buf->bs_dmstate = dic->di_dmstate;
        buf->bs_aextents = dic->di_anextents;
        buf->bs_forkoff = XFS_IFORK_BOFF(ip);
+       buf->bs_version = XFS_BULKSTAT_VERSION_V5;
 
        if (dic->di_version == 3) {
                if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
-                       buf->bs_cowextsize = dic->di_cowextsize <<
-                                       mp->m_sb.sb_blocklog;
+                       buf->bs_cowextsize_blks = dic->di_cowextsize;
        }
 
        switch (dic->di_format) {
@@ -118,385 +136,121 @@ xfs_bulkstat_one_int(
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        xfs_irele(ip);
 
-       error = formatter(buffer, ubsize, ubused, buf);
-       if (!error)
-               *stat = BULKSTAT_RV_DIDONE;
+       error = bc->formatter(bc->breq, buf);
+       if (error == XFS_IBULK_ABORT)
+               goto out_advance;
+       if (error)
+               goto out;
 
- out_free:
-       kmem_free(buf);
+out_advance:
+       /*
+        * Advance the cursor to the inode that comes after the one we just
+        * looked at.  We want the caller to move along if the bulkstat
+        * information was copied successfully; if we tried to grab the inode
+        * but it's no longer allocated; or if it's internal metadata.
+        */
+       bc->breq->startino = ino + 1;
+out:
        return error;
 }
 
-/* Return 0 on success or positive error */
-STATIC int
-xfs_bulkstat_one_fmt(
-       void                    __user *ubuffer,
-       int                     ubsize,
-       int                     *ubused,
-       const xfs_bstat_t       *buffer)
-{
-       if (ubsize < sizeof(*buffer))
-               return -ENOMEM;
-       if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
-               return -EFAULT;
-       if (ubused)
-               *ubused = sizeof(*buffer);
-       return 0;
-}
-
+/* Bulkstat a single inode. */
 int
 xfs_bulkstat_one(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* buffer to place output in */
-       int             ubsize,         /* size of buffer */
-       int             *ubused,        /* bytes used by me */
-       int             *stat)          /* BULKSTAT_RV_... */
+       struct xfs_ibulk        *breq,
+       bulkstat_one_fmt_pf     formatter)
 {
-       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                   xfs_bulkstat_one_fmt, ubused, stat);
-}
+       struct xfs_bstat_chunk  bc = {
+               .formatter      = formatter,
+               .breq           = breq,
+       };
+       int                     error;
 
-/*
- * Loop over all clusters in a chunk for a given incore inode allocation btree
- * record.  Do a readahead if there are any allocated inodes in that cluster.
- */
-STATIC void
-xfs_bulkstat_ichunk_ra(
-       struct xfs_mount                *mp,
-       xfs_agnumber_t                  agno,
-       struct xfs_inobt_rec_incore     *irec)
-{
-       xfs_agblock_t                   agbno;
-       struct blk_plug                 plug;
-       int                             i;      /* inode chunk index */
-
-       agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
-
-       blk_start_plug(&plug);
-       for (i = 0; i < XFS_INODES_PER_CHUNK;
-            i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) {
-               if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) &
-                   ~irec->ir_free) {
-                       xfs_btree_reada_bufs(mp, agno, agbno,
-                                       mp->m_blocks_per_cluster,
-                                       &xfs_inode_buf_ops);
-               }
-       }
-       blk_finish_plug(&plug);
-}
+       ASSERT(breq->icount == 1);
 
-/*
- * Lookup the inode chunk that the given inode lives in and then get the record
- * if we found the chunk.  If the inode was not the last in the chunk and there
- * are some left allocated, update the data for the pointed-to record as well as
- * return the count of grabbed inodes.
- */
-STATIC int
-xfs_bulkstat_grab_ichunk(
-       struct xfs_btree_cur            *cur,   /* btree cursor */
-       xfs_agino_t                     agino,  /* starting inode of chunk */
-       int                             *icount,/* return # of inodes grabbed */
-       struct xfs_inobt_rec_incore     *irec)  /* btree record */
-{
-       int                             idx;    /* index into inode chunk */
-       int                             stat;
-       int                             error = 0;
-
-       /* Lookup the inode chunk that this inode lives in */
-       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat);
-       if (error)
-               return error;
-       if (!stat) {
-               *icount = 0;
-               return error;
-       }
+       bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
+                       KM_SLEEP | KM_MAYFAIL);
+       if (!bc.buf)
+               return -ENOMEM;
 
-       /* Get the record, should always work */
-       error = xfs_inobt_get_rec(cur, irec, &stat);
-       if (error)
-               return error;
-       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
+       error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc);
 
-       /* Check if the record contains the inode in request */
-       if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
-               *icount = 0;
-               return 0;
-       }
+       kmem_free(bc.buf);
 
-       idx = agino - irec->ir_startino + 1;
-       if (idx < XFS_INODES_PER_CHUNK &&
-           (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) {
-               int     i;
-
-               /* We got a right chunk with some left inodes allocated at it.
-                * Grab the chunk record.  Mark all the uninteresting inodes
-                * free -- because they're before our start point.
-                */
-               for (i = 0; i < idx; i++) {
-                       if (XFS_INOBT_MASK(i) & ~irec->ir_free)
-                               irec->ir_freecount++;
-               }
-
-               irec->ir_free |= xfs_inobt_maskn(0, idx);
-               *icount = irec->ir_count - irec->ir_freecount;
-       }
+       /*
+        * If we reported one inode to userspace then we abort because we hit
+        * the end of the buffer.  Don't leak that back to userspace.
+        */
+       if (error == XFS_IWALK_ABORT)
+               error = 0;
 
-       return 0;
+       return error;
 }
 
-#define XFS_BULKSTAT_UBLEFT(ubleft)    ((ubleft) >= statstruct_size)
-
-struct xfs_bulkstat_agichunk {
-       char            __user **ac_ubuffer;/* pointer into user's buffer */
-       int             ac_ubleft;      /* bytes left in user's buffer */
-       int             ac_ubelem;      /* spaces used in user's buffer */
-};
-
-/*
- * Process inodes in chunk with a pointer to a formatter function
- * that will iget the inode and fill in the appropriate structure.
- */
 static int
-xfs_bulkstat_ag_ichunk(
-       struct xfs_mount                *mp,
-       xfs_agnumber_t                  agno,
-       struct xfs_inobt_rec_incore     *irbp,
-       bulkstat_one_pf                 formatter,
-       size_t                          statstruct_size,
-       struct xfs_bulkstat_agichunk    *acp,
-       xfs_agino_t                     *last_agino)
+xfs_bulkstat_iwalk(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               ino,
+       void                    *data)
 {
-       char                            __user **ubufp = acp->ac_ubuffer;
-       int                             chunkidx;
-       int                             error = 0;
-       xfs_agino_t                     agino = irbp->ir_startino;
-
-       for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
-            chunkidx++, agino++) {
-               int             fmterror;
-               int             ubused;
-
-               /* inode won't fit in buffer, we are done */
-               if (acp->ac_ubleft < statstruct_size)
-                       break;
-
-               /* Skip if this inode is free */
-               if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
-                       continue;
-
-               /* Get the inode and fill in a single buffer */
-               ubused = statstruct_size;
-               error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
-                                 *ubufp, acp->ac_ubleft, &ubused, &fmterror);
-
-               if (fmterror == BULKSTAT_RV_GIVEUP ||
-                   (error && error != -ENOENT && error != -EINVAL)) {
-                       acp->ac_ubleft = 0;
-                       ASSERT(error);
-                       break;
-               }
-
-               /* be careful not to leak error if at end of chunk */
-               if (fmterror == BULKSTAT_RV_NOTHING || error) {
-                       error = 0;
-                       continue;
-               }
-
-               *ubufp += ubused;
-               acp->ac_ubleft -= ubused;
-               acp->ac_ubelem++;
-       }
-
-       /*
-        * Post-update *last_agino. At this point, agino will always point one
-        * inode past the last inode we processed successfully. Hence we
-        * substract that inode when setting the *last_agino cursor so that we
-        * return the correct cookie to userspace. On the next bulkstat call,
-        * the inode under the lastino cookie will be skipped as we have already
-        * processed it here.
-        */
-       *last_agino = agino - 1;
+       int                     error;
 
+       error = xfs_bulkstat_one_int(mp, tp, ino, data);
+       /* bulkstat just skips over missing inodes */
+       if (error == -ENOENT || error == -EINVAL)
+               return 0;
        return error;
 }
 
 /*
- * Return stat information in bulk (by-inode) for the filesystem.
+ * Check the incoming lastino parameter.
+ *
+ * We allow any inode value that could map to physical space inside the
+ * filesystem because if there are no inodes there, bulkstat moves on to the
+ * next chunk.  In other words, the magic agino value of zero takes us to the
+ * first chunk in the AG, and an agino value past the end of the AG takes us to
+ * the first chunk in the next AG.
+ *
+ * Therefore we can end early if the requested inode is beyond the end of the
+ * filesystem or doesn't map properly.
  */
-int                                    /* error status */
-xfs_bulkstat(
-       xfs_mount_t             *mp,    /* mount point for filesystem */
-       xfs_ino_t               *lastinop, /* last inode returned */
-       int                     *ubcountp, /* size of buffer/count returned */
-       bulkstat_one_pf         formatter, /* func that'd fill a single buf */
-       size_t                  statstruct_size, /* sizeof struct filling */
-       char                    __user *ubuffer, /* buffer with inode stats */
-       int                     *done)  /* 1 if there are more stats to get */
+static inline bool
+xfs_bulkstat_already_done(
+       struct xfs_mount        *mp,
+       xfs_ino_t               startino)
 {
-       xfs_buf_t               *agbp;  /* agi header buffer */
-       xfs_agino_t             agino;  /* inode # in allocation group */
-       xfs_agnumber_t          agno;   /* allocation group number */
-       xfs_btree_cur_t         *cur;   /* btree cursor for ialloc btree */
-       xfs_inobt_rec_incore_t  *irbuf; /* start of irec buffer */
-       int                     nirbuf; /* size of irbuf */
-       int                     ubcount; /* size of user's buffer */
-       struct xfs_bulkstat_agichunk ac;
-       int                     error = 0;
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
+       xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, startino);
 
-       /*
-        * Get the last inode value, see if there's nothing to do.
-        */
-       agno = XFS_INO_TO_AGNO(mp, *lastinop);
-       agino = XFS_INO_TO_AGINO(mp, *lastinop);
-       if (agno >= mp->m_sb.sb_agcount ||
-           *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
-               *done = 1;
-               *ubcountp = 0;
-               return 0;
-       }
+       return agno >= mp->m_sb.sb_agcount ||
+              startino != XFS_AGINO_TO_INO(mp, agno, agino);
+}
 
-       ubcount = *ubcountp; /* statstruct's */
-       ac.ac_ubuffer = &ubuffer;
-       ac.ac_ubleft = ubcount * statstruct_size; /* bytes */;
-       ac.ac_ubelem = 0;
+/* Return stat information in bulk (by-inode) for the filesystem. */
+int
+xfs_bulkstat(
+       struct xfs_ibulk        *breq,
+       bulkstat_one_fmt_pf     formatter)
+{
+       struct xfs_bstat_chunk  bc = {
+               .formatter      = formatter,
+               .breq           = breq,
+       };
+       int                     error;
 
-       *ubcountp = 0;
-       *done = 0;
+       if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+               return 0;
 
-       irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
-       if (!irbuf)
+       bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
+                       KM_SLEEP | KM_MAYFAIL);
+       if (!bc.buf)
                return -ENOMEM;
-       nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
 
-       /*
-        * Loop over the allocation groups, starting from the last
-        * inode returned; 0 means start of the allocation group.
-        */
-       while (agno < mp->m_sb.sb_agcount) {
-               struct xfs_inobt_rec_incore     *irbp = irbuf;
-               struct xfs_inobt_rec_incore     *irbufend = irbuf + nirbuf;
-               bool                            end_of_ag = false;
-               int                             icount = 0;
-               int                             stat;
-
-               error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
-               if (error)
-                       break;
-               /*
-                * Allocate and initialize a btree cursor for ialloc btree.
-                */
-               cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
-                                           XFS_BTNUM_INO);
-               if (agino > 0) {
-                       /*
-                        * In the middle of an allocation group, we need to get
-                        * the remainder of the chunk we're in.
-                        */
-                       struct xfs_inobt_rec_incore     r;
-
-                       error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
-                       if (error)
-                               goto del_cursor;
-                       if (icount) {
-                               irbp->ir_startino = r.ir_startino;
-                               irbp->ir_holemask = r.ir_holemask;
-                               irbp->ir_count = r.ir_count;
-                               irbp->ir_freecount = r.ir_freecount;
-                               irbp->ir_free = r.ir_free;
-                               irbp++;
-                       }
-                       /* Increment to the next record */
-                       error = xfs_btree_increment(cur, 0, &stat);
-               } else {
-                       /* Start of ag.  Lookup the first inode chunk */
-                       error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
-               }
-               if (error || stat == 0) {
-                       end_of_ag = true;
-                       goto del_cursor;
-               }
-
-               /*
-                * Loop through inode btree records in this ag,
-                * until we run out of inodes or space in the buffer.
-                */
-               while (irbp < irbufend && icount < ubcount) {
-                       struct xfs_inobt_rec_incore     r;
-
-                       error = xfs_inobt_get_rec(cur, &r, &stat);
-                       if (error || stat == 0) {
-                               end_of_ag = true;
-                               goto del_cursor;
-                       }
-
-                       /*
-                        * If this chunk has any allocated inodes, save it.
-                        * Also start read-ahead now for this chunk.
-                        */
-                       if (r.ir_freecount < r.ir_count) {
-                               xfs_bulkstat_ichunk_ra(mp, agno, &r);
-                               irbp->ir_startino = r.ir_startino;
-                               irbp->ir_holemask = r.ir_holemask;
-                               irbp->ir_count = r.ir_count;
-                               irbp->ir_freecount = r.ir_freecount;
-                               irbp->ir_free = r.ir_free;
-                               irbp++;
-                               icount += r.ir_count - r.ir_freecount;
-                       }
-                       error = xfs_btree_increment(cur, 0, &stat);
-                       if (error || stat == 0) {
-                               end_of_ag = true;
-                               goto del_cursor;
-                       }
-                       cond_resched();
-               }
-
-               /*
-                * Drop the btree buffers and the agi buffer as we can't hold any
-                * of the locks these represent when calling iget. If there is a
-                * pending error, then we are done.
-                */
-del_cursor:
-               xfs_btree_del_cursor(cur, error);
-               xfs_buf_relse(agbp);
-               if (error)
-                       break;
-               /*
-                * Now format all the good inodes into the user's buffer. The
-                * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
-                * for the next loop iteration.
-                */
-               irbufend = irbp;
-               for (irbp = irbuf;
-                    irbp < irbufend && ac.ac_ubleft >= statstruct_size;
-                    irbp++) {
-                       error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
-                                       formatter, statstruct_size, &ac,
-                                       &agino);
-                       if (error)
-                               break;
-
-                       cond_resched();
-               }
-
-               /*
-                * If we've run out of space or had a formatting error, we
-                * are now done
-                */
-               if (ac.ac_ubleft < statstruct_size || error)
-                       break;
-
-               if (end_of_ag) {
-                       agno++;
-                       agino = 0;
-               }
-       }
-       /*
-        * Done, we're either out of filesystem or space to put the data.
-        */
-       kmem_free(irbuf);
-       *ubcountp = ac.ac_ubelem;
+       error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags,
+                       xfs_bulkstat_iwalk, breq->icount, &bc);
+
+       kmem_free(bc.buf);
 
        /*
         * We found some inodes, so clear the error status and return them.
@@ -505,135 +259,136 @@ del_cursor:
         * triggered again and propagated to userspace as there will be no
         * formatted inodes in the buffer.
         */
-       if (ac.ac_ubelem)
+       if (breq->ocount > 0)
                error = 0;
 
-       /*
-        * If we ran out of filesystem, lastino will point off the end of
-        * the filesystem so the next call will return immediately.
-        */
-       *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
-       if (agno >= mp->m_sb.sb_agcount)
-               *done = 1;
-
        return error;
 }
 
-int
-xfs_inumbers_fmt(
-       void                    __user *ubuffer, /* buffer to write to */
-       const struct xfs_inogrp *buffer,        /* buffer to read from */
-       long                    count,          /* # of elements to read */
-       long                    *written)       /* # of bytes written */
+/* Convert bulkstat (v5) to bstat (v1). */
+void
+xfs_bulkstat_to_bstat(
+       struct xfs_mount                *mp,
+       struct xfs_bstat                *bs1,
+       const struct xfs_bulkstat       *bstat)
 {
-       if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
-               return -EFAULT;
-       *written = count * sizeof(*buffer);
-       return 0;
+       memset(bs1, 0, sizeof(struct xfs_bstat));
+       bs1->bs_ino = bstat->bs_ino;
+       bs1->bs_mode = bstat->bs_mode;
+       bs1->bs_nlink = bstat->bs_nlink;
+       bs1->bs_uid = bstat->bs_uid;
+       bs1->bs_gid = bstat->bs_gid;
+       bs1->bs_rdev = bstat->bs_rdev;
+       bs1->bs_blksize = bstat->bs_blksize;
+       bs1->bs_size = bstat->bs_size;
+       bs1->bs_atime.tv_sec = bstat->bs_atime;
+       bs1->bs_mtime.tv_sec = bstat->bs_mtime;
+       bs1->bs_ctime.tv_sec = bstat->bs_ctime;
+       bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec;
+       bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec;
+       bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec;
+       bs1->bs_blocks = bstat->bs_blocks;
+       bs1->bs_xflags = bstat->bs_xflags;
+       bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks);
+       bs1->bs_extents = bstat->bs_extents;
+       bs1->bs_gen = bstat->bs_gen;
+       bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF;
+       bs1->bs_forkoff = bstat->bs_forkoff;
+       bs1->bs_projid_hi = bstat->bs_projectid >> 16;
+       bs1->bs_sick = bstat->bs_sick;
+       bs1->bs_checked = bstat->bs_checked;
+       bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks);
+       bs1->bs_dmevmask = 0;
+       bs1->bs_dmstate = 0;
+       bs1->bs_aextents = bstat->bs_aextents;
+}
+
+struct xfs_inumbers_chunk {
+       inumbers_fmt_pf         formatter;
+       struct xfs_ibulk        *breq;
+};
+
+/*
+ * INUMBERS
+ * ========
+ * This is how we export inode btree records to userspace, so that XFS tools
+ * can figure out where inodes are allocated.
+ */
+
+/*
+ * Format the inode group structure and report it somewhere.
+ *
+ * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk
+ * through the filesystem so we move it forward unless there was a runtime
+ * error.  If the formatter tells us the buffer is now full we also move the
+ * cursor forward and abort the walk.
+ */
+STATIC int
+xfs_inumbers_walk(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_agnumber_t          agno,
+       const struct xfs_inobt_rec_incore *irec,
+       void                    *data)
+{
+       struct xfs_inumbers     inogrp = {
+               .xi_startino    = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino),
+               .xi_alloccount  = irec->ir_count - irec->ir_freecount,
+               .xi_allocmask   = ~irec->ir_free,
+               .xi_version     = XFS_INUMBERS_VERSION_V5,
+       };
+       struct xfs_inumbers_chunk *ic = data;
+       int                     error;
+
+       error = ic->formatter(ic->breq, &inogrp);
+       if (error && error != XFS_IBULK_ABORT)
+               return error;
+
+       ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) +
+                       XFS_INODES_PER_CHUNK;
+       return error;
 }
 
 /*
  * Return inode number table for the filesystem.
  */
-int                                    /* error status */
+int
 xfs_inumbers(
-       struct xfs_mount        *mp,/* mount point for filesystem */
-       xfs_ino_t               *lastino,/* last inode returned */
-       int                     *count,/* size of buffer/count returned */
-       void                    __user *ubuffer,/* buffer with inode descriptions */
+       struct xfs_ibulk        *breq,
        inumbers_fmt_pf         formatter)
 {
-       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, *lastino);
-       xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, *lastino);
-       struct xfs_btree_cur    *cur = NULL;
-       struct xfs_buf          *agbp = NULL;
-       struct xfs_inogrp       *buffer;
-       int                     bcount;
-       int                     left = *count;
-       int                     bufidx = 0;
+       struct xfs_inumbers_chunk ic = {
+               .formatter      = formatter,
+               .breq           = breq,
+       };
        int                     error = 0;
 
-       *count = 0;
-       if (agno >= mp->m_sb.sb_agcount ||
-           *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
-               return error;
+       if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+               return 0;
 
-       bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer)));
-       buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
-       do {
-               struct xfs_inobt_rec_incore     r;
-               int                             stat;
-
-               if (!agbp) {
-                       error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
-                       if (error)
-                               break;
-
-                       cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
-                                                   XFS_BTNUM_INO);
-                       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
-                                                &stat);
-                       if (error)
-                               break;
-                       if (!stat)
-                               goto next_ag;
-               }
-
-               error = xfs_inobt_get_rec(cur, &r, &stat);
-               if (error)
-                       break;
-               if (!stat)
-                       goto next_ag;
-
-               agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
-               buffer[bufidx].xi_startino =
-                       XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
-               buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
-               buffer[bufidx].xi_allocmask = ~r.ir_free;
-               if (++bufidx == bcount) {
-                       long    written;
-
-                       error = formatter(ubuffer, buffer, bufidx, &written);
-                       if (error)
-                               break;
-                       ubuffer += written;
-                       *count += bufidx;
-                       bufidx = 0;
-               }
-               if (!--left)
-                       break;
-
-               error = xfs_btree_increment(cur, 0, &stat);
-               if (error)
-                       break;
-               if (stat)
-                       continue;
-
-next_ag:
-               xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-               cur = NULL;
-               xfs_buf_relse(agbp);
-               agbp = NULL;
-               agino = 0;
-               agno++;
-       } while (agno < mp->m_sb.sb_agcount);
-
-       if (!error) {
-               if (bufidx) {
-                       long    written;
-
-                       error = formatter(ubuffer, buffer, bufidx, &written);
-                       if (!error)
-                               *count += bufidx;
-               }
-               *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
-       }
+       error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags,
+                       xfs_inumbers_walk, breq->icount, &ic);
 
-       kmem_free(buffer);
-       if (cur)
-               xfs_btree_del_cursor(cur, error);
-       if (agbp)
-               xfs_buf_relse(agbp);
+       /*
+        * We found some inode groups, so clear the error status and return
+        * them.  The lastino pointer will point directly at the inode that
+        * triggered any error that occurred, so on the next call the error
+        * will be triggered again and propagated to userspace as there will be
+        * no formatted inode groups in the buffer.
+        */
+       if (breq->ocount > 0)
+               error = 0;
 
        return error;
 }
+
+/* Convert an inumbers (v5) struct to a inogrp (v1) struct. */
+void
+xfs_inumbers_to_inogrp(
+       struct xfs_inogrp               *ig1,
+       const struct xfs_inumbers       *ig)
+{
+       ig1->xi_startino = ig->xi_startino;
+       ig1->xi_alloccount = ig->xi_alloccount;
+       ig1->xi_allocmask = ig->xi_allocmask;
+}
index 8a822285b6718f417d4ab3c04a0566d1dac3566a..e90c1fc5b981a7786d02c8112995fdf78eee8b47 100644 (file)
@@ -5,83 +5,55 @@
 #ifndef __XFS_ITABLE_H__
 #define        __XFS_ITABLE_H__
 
-/*
- * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat
- * structures (by the dmi library). This is a pointer to a formatter function
- * that will iget the inode and fill in the appropriate structure.
- * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c
- */
-typedef int (*bulkstat_one_pf)(struct xfs_mount        *mp,
-                              xfs_ino_t        ino,
-                              void             __user *buffer,
-                              int              ubsize,
-                              int              *ubused,
-                              int              *stat);
+/* In-memory representation of a userspace request for batch inode data. */
+struct xfs_ibulk {
+       struct xfs_mount        *mp;
+       void __user             *ubuffer; /* user output buffer */
+       xfs_ino_t               startino; /* start with this inode */
+       unsigned int            icount;   /* number of elements in ubuffer */
+       unsigned int            ocount;   /* number of records returned */
+       unsigned int            flags;    /* see XFS_IBULK_FLAG_* */
+};
+
+/* Only iterate within the same AG as startino */
+#define XFS_IBULK_SAME_AG      (XFS_IWALK_SAME_AG)
+
+/* Return value that means we want to abort the walk. */
+#define XFS_IBULK_ABORT                (XFS_IWALK_ABORT)
 
 /*
- * Values for stat return value.
+ * Advance the user buffer pointer by one record of the given size.  If the
+ * buffer is now full, return the appropriate error code.
  */
-#define BULKSTAT_RV_NOTHING    0
-#define BULKSTAT_RV_DIDONE     1
-#define BULKSTAT_RV_GIVEUP     2
+static inline int
+xfs_ibulk_advance(
+       struct xfs_ibulk        *breq,
+       size_t                  bytes)
+{
+       char __user             *b = breq->ubuffer;
+
+       breq->ubuffer = b + bytes;
+       breq->ocount++;
+       return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0;
+}
 
 /*
  * Return stat information in bulk (by-inode) for the filesystem.
  */
-int                                    /* error status */
-xfs_bulkstat(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       *lastino,       /* last inode returned */
-       int             *count,         /* size of buffer/count returned */
-       bulkstat_one_pf formatter,      /* func that'd fill a single buf */
-       size_t          statstruct_size,/* sizeof struct that we're filling */
-       char            __user *ubuffer,/* buffer with inode stats */
-       int             *done);         /* 1 if there are more stats to get */
-
-typedef int (*bulkstat_one_fmt_pf)(  /* used size in bytes or negative error */
-       void                    __user *ubuffer, /* buffer to write to */
-       int                     ubsize,          /* remaining user buffer sz */
-       int                     *ubused,         /* bytes used by formatter */
-       const xfs_bstat_t       *buffer);        /* buffer to read from */
-
-int
-xfs_bulkstat_one_int(
-       xfs_mount_t             *mp,
-       xfs_ino_t               ino,
-       void                    __user *buffer,
-       int                     ubsize,
-       bulkstat_one_fmt_pf     formatter,
-       int                     *ubused,
-       int                     *stat);
 
-int
-xfs_bulkstat_one(
-       xfs_mount_t             *mp,
-       xfs_ino_t               ino,
-       void                    __user *buffer,
-       int                     ubsize,
-       int                     *ubused,
-       int                     *stat);
+typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq,
+               const struct xfs_bulkstat *bstat);
 
-typedef int (*inumbers_fmt_pf)(
-       void                    __user *ubuffer, /* buffer to write to */
-       const xfs_inogrp_t      *buffer,        /* buffer to read from */
-       long                    count,          /* # of elements to read */
-       long                    *written);      /* # of bytes written */
+int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
+int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
+void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1,
+               const struct xfs_bulkstat *bstat);
 
-int
-xfs_inumbers_fmt(
-       void                    __user *ubuffer, /* buffer to write to */
-       const xfs_inogrp_t      *buffer,        /* buffer to read from */
-       long                    count,          /* # of elements to read */
-       long                    *written);      /* # of bytes written */
+typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq,
+               const struct xfs_inumbers *igrp);
 
-int                                    /* error status */
-xfs_inumbers(
-       xfs_mount_t             *mp,    /* mount point for filesystem */
-       xfs_ino_t               *last,  /* last inode returned */
-       int                     *count, /* size of buffer/count returned */
-       void                    __user *buffer, /* buffer with inode info */
-       inumbers_fmt_pf         formatter);
+int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter);
+void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1,
+               const struct xfs_inumbers *ig);
 
 #endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
new file mode 100644 (file)
index 0000000..8c7d727
--- /dev/null
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_icache.h"
+#include "xfs_health.h"
+#include "xfs_trans.h"
+#include "xfs_pwork.h"
+
+/*
+ * Walking Inodes in the Filesystem
+ * ================================
+ *
+ * This iterator function walks a subset of filesystem inodes in increasing
+ * order from @startino until there are no more inodes.  For each allocated
+ * inode it finds, it calls a walk function with the relevant inode number and
+ * a pointer to caller-provided data.  The walk function can return the usual
+ * negative error code to stop the iteration; 0 to continue the iteration; or
+ * XFS_IWALK_ABORT to stop the iteration.  This return value is returned to the
+ * caller.
+ *
+ * Internally, we allow the walk function to do anything, which means that we
+ * cannot maintain the inobt cursor or our lock on the AGI buffer.  We
+ * therefore cache the inobt records in kernel memory and only call the walk
+ * function when our memory buffer is full.  @nr_recs is the number of records
+ * that we've cached, and @sz_recs is the size of our cache.
+ *
+ * It is the responsibility of the walk function to ensure it accesses
+ * allocated inodes, as the inobt records may be stale by the time they are
+ * acted upon.
+ */
+
+struct xfs_iwalk_ag {
+       /* parallel work control data; will be null if single threaded */
+       struct xfs_pwork                pwork;
+
+       struct xfs_mount                *mp;
+       struct xfs_trans                *tp;
+
+       /* Where do we start the traversal? */
+       xfs_ino_t                       startino;
+
+       /* Array of inobt records we cache. */
+       struct xfs_inobt_rec_incore     *recs;
+
+       /* Number of entries allocated for the @recs array. */
+       unsigned int                    sz_recs;
+
+       /* Number of entries in the @recs array that are in use. */
+       unsigned int                    nr_recs;
+
+       /* Inode walk function and data pointer. */
+       xfs_iwalk_fn                    iwalk_fn;
+       xfs_inobt_walk_fn               inobt_walk_fn;
+       void                            *data;
+
+       /*
+        * Make it look like the inodes up to startino are free so that
+        * bulkstat can start its inode iteration at the correct place without
+        * needing to special case everywhere.
+        */
+       unsigned int                    trim_start:1;
+
+       /* Skip empty inobt records? */
+       unsigned int                    skip_empty:1;
+};
+
+/*
+ * Loop over all clusters in a chunk for a given incore inode allocation btree
+ * record.  Do a readahead if there are any allocated inodes in that cluster.
+ */
+STATIC void
+xfs_iwalk_ichunk_ra(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t                  agno,
+       struct xfs_inobt_rec_incore     *irec)
+{
+       struct xfs_ino_geometry         *igeo = M_IGEO(mp);
+       xfs_agblock_t                   agbno;
+       struct blk_plug                 plug;
+       int                             i;      /* inode chunk index */
+
+       agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
+
+       blk_start_plug(&plug);
+       for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) {
+               xfs_inofree_t   imask;
+
+               imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster);
+               if (imask & ~irec->ir_free) {
+                       xfs_btree_reada_bufs(mp, agno, agbno,
+                                       igeo->blocks_per_cluster,
+                                       &xfs_inode_buf_ops);
+               }
+               agbno += igeo->blocks_per_cluster;
+       }
+       blk_finish_plug(&plug);
+}
+
+/*
+ * Set the bits in @irec's free mask that correspond to the inodes before
+ * @agino so that we skip them.  This is how we restart an inode walk that was
+ * interrupted in the middle of an inode record.
+ */
+STATIC void
+xfs_iwalk_adjust_start(
+       xfs_agino_t                     agino,  /* starting inode of chunk */
+       struct xfs_inobt_rec_incore     *irec)  /* btree record */
+{
+       int                             idx;    /* index into inode chunk */
+       int                             i;
+
+       idx = agino - irec->ir_startino;
+
+       /*
+        * We got a right chunk with some left inodes allocated at it.  Grab
+        * the chunk record.  Mark all the uninteresting inodes free because
+        * they're before our start point.
+        */
+       for (i = 0; i < idx; i++) {
+               if (XFS_INOBT_MASK(i) & ~irec->ir_free)
+                       irec->ir_freecount++;
+       }
+
+       irec->ir_free |= xfs_inobt_maskn(0, idx);
+}
+
+/* Allocate memory for a walk. */
+STATIC int
+xfs_iwalk_alloc(
+       struct xfs_iwalk_ag     *iwag)
+{
+       size_t                  size;
+
+       ASSERT(iwag->recs == NULL);
+       iwag->nr_recs = 0;
+
+       /* Allocate a prefetch buffer for inobt records. */
+       size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore);
+       iwag->recs = kmem_alloc(size, KM_MAYFAIL);
+       if (iwag->recs == NULL)
+               return -ENOMEM;
+
+       return 0;
+}
+
+/* Free memory we allocated for a walk. */
+STATIC void
+xfs_iwalk_free(
+       struct xfs_iwalk_ag     *iwag)
+{
+       kmem_free(iwag->recs);
+       iwag->recs = NULL;
+}
+
+/* For each inuse inode in each cached inobt record, call our function. */
+STATIC int
+xfs_iwalk_ag_recs(
+       struct xfs_iwalk_ag             *iwag)
+{
+       struct xfs_mount                *mp = iwag->mp;
+       struct xfs_trans                *tp = iwag->tp;
+       xfs_ino_t                       ino;
+       unsigned int                    i, j;
+       xfs_agnumber_t                  agno;
+       int                             error;
+
+       agno = XFS_INO_TO_AGNO(mp, iwag->startino);
+       for (i = 0; i < iwag->nr_recs; i++) {
+               struct xfs_inobt_rec_incore     *irec = &iwag->recs[i];
+
+               trace_xfs_iwalk_ag_rec(mp, agno, irec);
+
+               if (xfs_pwork_want_abort(&iwag->pwork))
+                       return 0;
+
+               if (iwag->inobt_walk_fn) {
+                       error = iwag->inobt_walk_fn(mp, tp, agno, irec,
+                                       iwag->data);
+                       if (error)
+                               return error;
+               }
+
+               if (!iwag->iwalk_fn)
+                       continue;
+
+               for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+                       if (xfs_pwork_want_abort(&iwag->pwork))
+                               return 0;
+
+                       /* Skip if this inode is free */
+                       if (XFS_INOBT_MASK(j) & irec->ir_free)
+                               continue;
+
+                       /* Otherwise call our function. */
+                       ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j);
+                       error = iwag->iwalk_fn(mp, tp, ino, iwag->data);
+                       if (error)
+                               return error;
+               }
+       }
+
+       return 0;
+}
+
+/* Delete cursor and let go of AGI. */
+static inline void
+xfs_iwalk_del_inobt(
+       struct xfs_trans        *tp,
+       struct xfs_btree_cur    **curpp,
+       struct xfs_buf          **agi_bpp,
+       int                     error)
+{
+       if (*curpp) {
+               xfs_btree_del_cursor(*curpp, error);
+               *curpp = NULL;
+       }
+       if (*agi_bpp) {
+               xfs_trans_brelse(tp, *agi_bpp);
+               *agi_bpp = NULL;
+       }
+}
+
+/*
+ * Set ourselves up for walking inobt records starting from a given point in
+ * the filesystem.
+ *
+ * If caller passed in a nonzero start inode number, load the record from the
+ * inobt and make the record look like all the inodes before agino are free so
+ * that we skip them, and then move the cursor to the next inobt record.  This
+ * is how we support starting an iwalk in the middle of an inode chunk.
+ *
+ * If the caller passed in a start number of zero, move the cursor to the first
+ * inobt record.
+ *
+ * The caller is responsible for cleaning up the cursor and buffer pointer
+ * regardless of the error status.
+ */
+STATIC int
+xfs_iwalk_ag_start(
+       struct xfs_iwalk_ag     *iwag,
+       xfs_agnumber_t          agno,
+       xfs_agino_t             agino,
+       struct xfs_btree_cur    **curpp,
+       struct xfs_buf          **agi_bpp,
+       int                     *has_more)
+{
+       struct xfs_mount        *mp = iwag->mp;
+       struct xfs_trans        *tp = iwag->tp;
+       struct xfs_inobt_rec_incore *irec;
+       int                     error;
+
+       /* Set up a fresh cursor and empty the inobt cache. */
+       iwag->nr_recs = 0;
+       error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
+       if (error)
+               return error;
+
+       /* Starting at the beginning of the AG?  That's easy! */
+       if (agino == 0)
+               return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more);
+
+       /*
+        * Otherwise, we have to grab the inobt record where we left off, stuff
+        * the record into our cache, and then see if there are more records.
+        * We require a lookup cache of at least two elements so that the
+        * caller doesn't have to deal with tearing down the cursor to walk the
+        * records.
+        */
+       error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more);
+       if (error)
+               return error;
+
+       /*
+        * If the LE lookup at @agino yields no records, jump ahead to the
+        * inobt cursor increment to see if there are more records to process.
+        */
+       if (!*has_more)
+               goto out_advance;
+
+       /* Get the record, should always work */
+       irec = &iwag->recs[iwag->nr_recs];
+       error = xfs_inobt_get_rec(*curpp, irec, has_more);
+       if (error)
+               return error;
+       XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1);
+
+       /*
+        * If the LE lookup yielded an inobt record before the cursor position,
+        * skip it and see if there's another one after it.
+        */
+       if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
+               goto out_advance;
+
+       /*
+        * If agino fell in the middle of the inode record, make it look like
+        * the inodes up to agino are free so that we don't return them again.
+        */
+       if (iwag->trim_start)
+               xfs_iwalk_adjust_start(agino, irec);
+
+       /*
+        * The prefetch calculation is supposed to give us a large enough inobt
+        * record cache that grab_ichunk can stage a partial first record and
+        * the loop body can cache a record without having to check for cache
+        * space until after it reads an inobt record.
+        */
+       iwag->nr_recs++;
+       ASSERT(iwag->nr_recs < iwag->sz_recs);
+
+out_advance:
+       return xfs_btree_increment(*curpp, 0, has_more);
+}
+
+/*
+ * The inobt record cache is full, so preserve the inobt cursor state and
+ * run callbacks on the cached inobt records.  When we're done, restore the
+ * cursor state to wherever the cursor would have been had the cache not been
+ * full (and therefore we could've just incremented the cursor) if *@has_more
+ * is true.  On exit, *@has_more will indicate whether or not the caller should
+ * try for more inode records.
+ */
+STATIC int
+xfs_iwalk_run_callbacks(
+       struct xfs_iwalk_ag             *iwag,
+       xfs_agnumber_t                  agno,
+       struct xfs_btree_cur            **curpp,
+       struct xfs_buf                  **agi_bpp,
+       int                             *has_more)
+{
+       struct xfs_mount                *mp = iwag->mp;
+       struct xfs_trans                *tp = iwag->tp;
+       struct xfs_inobt_rec_incore     *irec;
+       xfs_agino_t                     restart;
+       int                             error;
+
+       ASSERT(iwag->nr_recs > 0);
+
+       /* Delete cursor but remember the last record we cached... */
+       xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
+       irec = &iwag->recs[iwag->nr_recs - 1];
+       restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
+
+       error = xfs_iwalk_ag_recs(iwag);
+       if (error)
+               return error;
+
+       /* ...empty the cache... */
+       iwag->nr_recs = 0;
+
+       if (!has_more)
+               return 0;
+
+       /* ...and recreate the cursor just past where we left off. */
+       error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
+       if (error)
+               return error;
+
+       return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
+}
+
+/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
+STATIC int
+xfs_iwalk_ag(
+       struct xfs_iwalk_ag             *iwag)
+{
+       struct xfs_mount                *mp = iwag->mp;
+       struct xfs_trans                *tp = iwag->tp;
+       struct xfs_buf                  *agi_bp = NULL;
+       struct xfs_btree_cur            *cur = NULL;
+       xfs_agnumber_t                  agno;
+       xfs_agino_t                     agino;
+       int                             has_more;
+       int                             error = 0;
+
+       /* Set up our cursor at the right place in the inode btree. */
+       agno = XFS_INO_TO_AGNO(mp, iwag->startino);
+       agino = XFS_INO_TO_AGINO(mp, iwag->startino);
+       error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more);
+
+       while (!error && has_more) {
+               struct xfs_inobt_rec_incore     *irec;
+
+               cond_resched();
+               if (xfs_pwork_want_abort(&iwag->pwork))
+                       goto out;
+
+               /* Fetch the inobt record. */
+               irec = &iwag->recs[iwag->nr_recs];
+               error = xfs_inobt_get_rec(cur, irec, &has_more);
+               if (error || !has_more)
+                       break;
+
+               /* No allocated inodes in this chunk; skip it. */
+               if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
+                       error = xfs_btree_increment(cur, 0, &has_more);
+                       if (error)
+                               break;
+                       continue;
+               }
+
+               /*
+                * Start readahead for this inode chunk in anticipation of
+                * walking the inodes.
+                */
+               if (iwag->iwalk_fn)
+                       xfs_iwalk_ichunk_ra(mp, agno, irec);
+
+               /*
+                * If there's space in the buffer for more records, increment
+                * the btree cursor and grab more.
+                */
+               if (++iwag->nr_recs < iwag->sz_recs) {
+                       error = xfs_btree_increment(cur, 0, &has_more);
+                       if (error || !has_more)
+                               break;
+                       continue;
+               }
+
+               /*
+                * Otherwise, we need to save cursor state and run the callback
+                * function on the cached records.  The run_callbacks function
+                * is supposed to return a cursor pointing to the record where
+                * we would be if we had been able to increment like above.
+                */
+               ASSERT(has_more);
+               error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp,
+                               &has_more);
+       }
+
+       if (iwag->nr_recs == 0 || error)
+               goto out;
+
+       /* Walk the unprocessed records in the cache. */
+       error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more);
+
+out:
+       xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error);
+       return error;
+}
+
+/*
+ * We experimentally determined that the reduction in ioctl call overhead
+ * diminishes when userspace asks for more than 2048 inodes, so we'll cap
+ * prefetch at this point.
+ */
+#define IWALK_MAX_INODE_PREFETCH       (2048U)
+
+/*
+ * Given the number of inodes to prefetch, set the number of inobt records that
+ * we cache in memory, which controls the number of inodes we try to read
+ * ahead.  Set the maximum if @inodes == 0.
+ */
+static inline unsigned int
+xfs_iwalk_prefetch(
+       unsigned int            inodes)
+{
+       unsigned int            inobt_records;
+
+       /*
+        * If the caller didn't tell us the number of inodes they wanted,
+        * assume the maximum prefetch possible for best performance.
+        * Otherwise, cap prefetch at that maximum so that we don't start an
+        * absurd amount of prefetch.
+        */
+       if (inodes == 0)
+               inodes = IWALK_MAX_INODE_PREFETCH;
+       inodes = min(inodes, IWALK_MAX_INODE_PREFETCH);
+
+       /* Round the inode count up to a full chunk. */
+       inodes = round_up(inodes, XFS_INODES_PER_CHUNK);
+
+       /*
+        * In order to convert the number of inodes to prefetch into an
+        * estimate of the number of inobt records to cache, we require a
+        * conversion factor that reflects our expectations of the average
+        * loading factor of an inode chunk.  Based on data gathered, most
+        * (but not all) filesystems manage to keep the inode chunks totally
+        * full, so we'll underestimate slightly so that our readahead will
+        * still deliver the performance we want on aging filesystems:
+        *
+        * inobt = inodes / (INODES_PER_CHUNK * (4 / 5));
+        *
+        * The funny math is to avoid integer division.
+        */
+       inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK);
+
+       /*
+        * Allocate enough space to prefetch at least two inobt records so that
+        * we can cache both the record where the iwalk started and the next
+        * record.  This simplifies the AG inode walk loop setup code.
+        */
+       return max(inobt_records, 2U);
+}
+
+/*
+ * Walk all inodes in the filesystem starting from @startino.  The @iwalk_fn
+ * will be called for each allocated inode, being passed the inode's number and
+ * @data.  @max_prefetch controls how many inobt records' worth of inodes we
+ * try to readahead.
+ */
+int
+xfs_iwalk(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               startino,
+       unsigned int            flags,
+       xfs_iwalk_fn            iwalk_fn,
+       unsigned int            inode_records,
+       void                    *data)
+{
+       struct xfs_iwalk_ag     iwag = {
+               .mp             = mp,
+               .tp             = tp,
+               .iwalk_fn       = iwalk_fn,
+               .data           = data,
+               .startino       = startino,
+               .sz_recs        = xfs_iwalk_prefetch(inode_records),
+               .trim_start     = 1,
+               .skip_empty     = 1,
+               .pwork          = XFS_PWORK_SINGLE_THREADED,
+       };
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
+       int                     error;
+
+       ASSERT(agno < mp->m_sb.sb_agcount);
+       ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+       error = xfs_iwalk_alloc(&iwag);
+       if (error)
+               return error;
+
+       for (; agno < mp->m_sb.sb_agcount; agno++) {
+               error = xfs_iwalk_ag(&iwag);
+               if (error)
+                       break;
+               iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+               if (flags & XFS_INOBT_WALK_SAME_AG)
+                       break;
+       }
+
+       xfs_iwalk_free(&iwag);
+       return error;
+}
+
+/* Run per-thread iwalk work. */
+static int
+xfs_iwalk_ag_work(
+       struct xfs_mount        *mp,
+       struct xfs_pwork        *pwork)
+{
+       struct xfs_iwalk_ag     *iwag;
+       int                     error = 0;
+
+       iwag = container_of(pwork, struct xfs_iwalk_ag, pwork);
+       if (xfs_pwork_want_abort(pwork))
+               goto out;
+
+       error = xfs_iwalk_alloc(iwag);
+       if (error)
+               goto out;
+
+       error = xfs_iwalk_ag(iwag);
+       xfs_iwalk_free(iwag);
+out:
+       kmem_free(iwag);
+       return error;
+}
+
+/*
+ * Walk all the inodes in the filesystem using multiple threads to process each
+ * AG.
+ */
+int
+xfs_iwalk_threaded(
+       struct xfs_mount        *mp,
+       xfs_ino_t               startino,
+       unsigned int            flags,
+       xfs_iwalk_fn            iwalk_fn,
+       unsigned int            inode_records,
+       bool                    polled,
+       void                    *data)
+{
+       struct xfs_pwork_ctl    pctl;
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
+       unsigned int            nr_threads;
+       int                     error;
+
+       ASSERT(agno < mp->m_sb.sb_agcount);
+       ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+       nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
+       error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk",
+                       nr_threads);
+       if (error)
+               return error;
+
+       for (; agno < mp->m_sb.sb_agcount; agno++) {
+               struct xfs_iwalk_ag     *iwag;
+
+               if (xfs_pwork_ctl_want_abort(&pctl))
+                       break;
+
+               iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP);
+               iwag->mp = mp;
+               iwag->iwalk_fn = iwalk_fn;
+               iwag->data = data;
+               iwag->startino = startino;
+               iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
+               xfs_pwork_queue(&pctl, &iwag->pwork);
+               startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+               if (flags & XFS_INOBT_WALK_SAME_AG)
+                       break;
+       }
+
+       if (polled)
+               xfs_pwork_poll(&pctl);
+       return xfs_pwork_destroy(&pctl);
+}
+
+/*
+ * Allow callers to cache up to a page's worth of inobt records.  This reflects
+ * the existing inumbers prefetching behavior.  Since the inobt walk does not
+ * itself do anything with the inobt records, we can set a fairly high limit
+ * here.
+ */
+#define MAX_INOBT_WALK_PREFETCH        \
+       (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore))
+
+/*
+ * Given the number of records that the user wanted, set the number of inobt
+ * records that we buffer in memory.  Set the maximum if @inobt_records == 0.
+ */
+static inline unsigned int
+xfs_inobt_walk_prefetch(
+       unsigned int            inobt_records)
+{
+       /*
+        * If the caller didn't tell us the number of inobt records they
+        * wanted, assume the maximum prefetch possible for best performance.
+        */
+       if (inobt_records == 0)
+               inobt_records = MAX_INOBT_WALK_PREFETCH;
+
+       /*
+        * Allocate enough space to prefetch at least two inobt records so that
+        * we can cache both the record where the iwalk started and the next
+        * record.  This simplifies the AG inode walk loop setup code.
+        */
+       inobt_records = max(inobt_records, 2U);
+
+       /*
+        * Cap prefetch at that maximum so that we don't use an absurd amount
+        * of memory.
+        */
+       return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH);
+}
+
+/*
+ * Walk all inode btree records in the filesystem starting from @startino.  The
+ * @inobt_walk_fn will be called for each btree record, being passed the incore
+ * record and @data.  @max_prefetch controls how many inobt records we try to
+ * cache ahead of time.
+ */
+int
+xfs_inobt_walk(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               startino,
+       unsigned int            flags,
+       xfs_inobt_walk_fn       inobt_walk_fn,
+       unsigned int            inobt_records,
+       void                    *data)
+{
+       struct xfs_iwalk_ag     iwag = {
+               .mp             = mp,
+               .tp             = tp,
+               .inobt_walk_fn  = inobt_walk_fn,
+               .data           = data,
+               .startino       = startino,
+               .sz_recs        = xfs_inobt_walk_prefetch(inobt_records),
+               .pwork          = XFS_PWORK_SINGLE_THREADED,
+       };
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
+       int                     error;
+
+       ASSERT(agno < mp->m_sb.sb_agcount);
+       ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL));
+
+       error = xfs_iwalk_alloc(&iwag);
+       if (error)
+               return error;
+
+       for (; agno < mp->m_sb.sb_agcount; agno++) {
+               error = xfs_iwalk_ag(&iwag);
+               if (error)
+                       break;
+               iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+               if (flags & XFS_INOBT_WALK_SAME_AG)
+                       break;
+       }
+
+       xfs_iwalk_free(&iwag);
+       return error;
+}
diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h
new file mode 100644 (file)
index 0000000..6c960e1
--- /dev/null
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_IWALK_H__
+#define __XFS_IWALK_H__
+
+/* Walk all inodes in the filesystem starting from @startino. */
+typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
+                           xfs_ino_t ino, void *data);
+/* Return values for xfs_iwalk_fn. */
+#define XFS_IWALK_CONTINUE     (XFS_ITER_CONTINUE)
+#define XFS_IWALK_ABORT                (XFS_ITER_ABORT)
+
+int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino,
+               unsigned int flags, xfs_iwalk_fn iwalk_fn,
+               unsigned int inode_records, void *data);
+int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino,
+               unsigned int flags, xfs_iwalk_fn iwalk_fn,
+               unsigned int inode_records, bool poll, void *data);
+
+/* Only iterate inodes within the same AG as @startino. */
+#define XFS_IWALK_SAME_AG      (0x1)
+
+#define XFS_IWALK_FLAGS_ALL    (XFS_IWALK_SAME_AG)
+
+/* Walk all inode btree records in the filesystem starting from @startino. */
+typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
+                                xfs_agnumber_t agno,
+                                const struct xfs_inobt_rec_incore *irec,
+                                void *data);
+/* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */
+#define XFS_INOBT_WALK_ABORT   (XFS_IWALK_ABORT)
+
+int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp,
+               xfs_ino_t startino, unsigned int flags,
+               xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records,
+               void *data);
+
+/* Only iterate inobt records within the same AG as @startino. */
+#define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG)
+
+#define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG)
+
+#endif /* __XFS_IWALK_H__ */
index edbd5a210df22144ab810a67d88dac5f479b39f7..ca15105681cacb7c3677b2397baf729ca846d47c 100644 (file)
@@ -110,8 +110,6 @@ typedef __u32                       xfs_nlink_t;
 #define current_restore_flags_nested(sp, f)    \
                (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
 
-#define spinlock_destroy(lock)
-
 #define NBBY           8               /* number of bits per byte */
 
 /*
@@ -221,6 +219,9 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
        return x;
 }
 
+int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
+               char *data, unsigned int op);
+
 #define ASSERT_ALWAYS(expr)    \
        (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
 
index 2466b0f5b6c43f175c27e4952e1aed7a0698c607..00e9f5c388d366031fd8c5b713655a718c0a9287 100644 (file)
 #include "xfs_trans_priv.h"
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode.h"
 #include "xfs_trace.h"
-#include "xfs_fsops.h"
-#include "xfs_cksum.h"
 #include "xfs_sysfs.h"
 #include "xfs_sb.h"
 #include "xfs_health.h"
@@ -45,21 +41,14 @@ STATIC int
 xlog_space_left(
        struct xlog             *log,
        atomic64_t              *head);
-STATIC int
-xlog_sync(
-       struct xlog             *log,
-       struct xlog_in_core     *iclog);
 STATIC void
 xlog_dealloc_log(
        struct xlog             *log);
 
 /* local state machine functions */
-STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
-STATIC void
-xlog_state_do_callback(
-       struct xlog             *log,
-       int                     aborted,
-       struct xlog_in_core     *iclog);
+STATIC void xlog_state_done_syncing(
+       struct xlog_in_core     *iclog,
+       bool                    aborted);
 STATIC int
 xlog_state_get_iclog_space(
        struct xlog             *log,
@@ -107,8 +96,7 @@ STATIC void
 xlog_verify_iclog(
        struct xlog             *log,
        struct xlog_in_core     *iclog,
-       int                     count,
-       bool                    syncing);
+       int                     count);
 STATIC void
 xlog_verify_tail_lsn(
        struct xlog             *log,
@@ -117,7 +105,7 @@ xlog_verify_tail_lsn(
 #else
 #define xlog_verify_dest_ptr(a,b)
 #define xlog_verify_grant_tail(a)
-#define xlog_verify_iclog(a,b,c,d)
+#define xlog_verify_iclog(a,b,c)
 #define xlog_verify_tail_lsn(a,b,c)
 #endif
 
@@ -541,32 +529,6 @@ xfs_log_done(
        return lsn;
 }
 
-/*
- * Attaches a new iclog I/O completion callback routine during
- * transaction commit.  If the log is in error state, a non-zero
- * return code is handed back and the caller is responsible for
- * executing the callback at an appropriate time.
- */
-int
-xfs_log_notify(
-       struct xlog_in_core     *iclog,
-       xfs_log_callback_t      *cb)
-{
-       int     abortflg;
-
-       spin_lock(&iclog->ic_callback_lock);
-       abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
-       if (!abortflg) {
-               ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
-                             (iclog->ic_state == XLOG_STATE_WANT_SYNC));
-               cb->cb_next = NULL;
-               *(iclog->ic_callback_tail) = cb;
-               iclog->ic_callback_tail = &(cb->cb_next);
-       }
-       spin_unlock(&iclog->ic_callback_lock);
-       return abortflg;
-}
-
 int
 xfs_log_release_iclog(
        struct xfs_mount        *mp,
@@ -807,16 +769,12 @@ xfs_log_mount_finish(
  * The mount has failed. Cancel the recovery if it hasn't completed and destroy
  * the log.
  */
-int
+void
 xfs_log_mount_cancel(
        struct xfs_mount        *mp)
 {
-       int                     error;
-
-       error = xlog_recover_cancel(mp->m_log);
+       xlog_recover_cancel(mp->m_log);
        xfs_log_unmount(mp);
-
-       return error;
 }
 
 /*
@@ -932,7 +890,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
         * Or, if we are doing a forced umount (typically because of IO errors).
         */
        if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
-           xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+           xfs_readonly_buftarg(log->l_targ)) {
                ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
                return 0;
        }
@@ -1244,53 +1202,49 @@ xlog_space_left(
 }
 
 
-/*
- * Log function which is called when an io completes.
- *
- * The log manager needs its own routine, in order to control what
- * happens with the buffer after the write completes.
- */
 static void
-xlog_iodone(xfs_buf_t *bp)
+xlog_ioend_work(
+       struct work_struct      *work)
 {
-       struct xlog_in_core     *iclog = bp->b_log_item;
-       struct xlog             *l = iclog->ic_log;
-       int                     aborted = 0;
+       struct xlog_in_core     *iclog =
+               container_of(work, struct xlog_in_core, ic_end_io_work);
+       struct xlog             *log = iclog->ic_log;
+       bool                    aborted = false;
+       int                     error;
+
+       error = blk_status_to_errno(iclog->ic_bio.bi_status);
+#ifdef DEBUG
+       /* treat writes with injected CRC errors as failed */
+       if (iclog->ic_fail_crc)
+               error = -EIO;
+#endif
 
        /*
-        * Race to shutdown the filesystem if we see an error or the iclog is in
-        * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
-        * CRC errors into log recovery.
+        * Race to shutdown the filesystem if we see an error.
         */
-       if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
-           iclog->ic_state & XLOG_STATE_IOABORT) {
-               if (iclog->ic_state & XLOG_STATE_IOABORT)
-                       iclog->ic_state &= ~XLOG_STATE_IOABORT;
-
-               xfs_buf_ioerror_alert(bp, __func__);
-               xfs_buf_stale(bp);
-               xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
+       if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
+               xfs_alert(log->l_mp, "log I/O error %d", error);
+               xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
                /*
                 * This flag will be propagated to the trans-committed
                 * callback routines to let them know that the log-commit
                 * didn't succeed.
                 */
-               aborted = XFS_LI_ABORTED;
+               aborted = true;
        } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
-               aborted = XFS_LI_ABORTED;
+               aborted = true;
        }
 
-       /* log I/O is always issued ASYNC */
-       ASSERT(bp->b_flags & XBF_ASYNC);
        xlog_state_done_syncing(iclog, aborted);
+       bio_uninit(&iclog->ic_bio);
 
        /*
-        * drop the buffer lock now that we are done. Nothing references
-        * the buffer after this, so an unmount waiting on this lock can now
-        * tear it down safely. As such, it is unsafe to reference the buffer
-        * (bp) after the unlock as we could race with it being freed.
+        * Drop the lock to signal that we are done. Nothing references the
+        * iclog after this, so an unmount waiting on this lock can now tear it
+        * down safely. As such, it is unsafe to reference the iclog after the
+        * unlock as we could race with it being freed.
         */
-       xfs_buf_unlock(bp);
+       up(&iclog->ic_sema);
 }
 
 /*
@@ -1301,65 +1255,26 @@ xlog_iodone(xfs_buf_t *bp)
  * If the filesystem blocksize is too large, we may need to choose a
  * larger size since the directory code currently logs entire blocks.
  */
-
 STATIC void
 xlog_get_iclog_buffer_size(
        struct xfs_mount        *mp,
        struct xlog             *log)
 {
-       int size;
-       int xhdrs;
-
        if (mp->m_logbufs <= 0)
-               log->l_iclog_bufs = XLOG_MAX_ICLOGS;
-       else
-               log->l_iclog_bufs = mp->m_logbufs;
+               mp->m_logbufs = XLOG_MAX_ICLOGS;
+       if (mp->m_logbsize <= 0)
+               mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
+
+       log->l_iclog_bufs = mp->m_logbufs;
+       log->l_iclog_size = mp->m_logbsize;
 
        /*
-        * Buffer size passed in from mount system call.
+        * # headers = size / 32k - one header holds cycles from 32k of data.
         */
-       if (mp->m_logbsize > 0) {
-               size = log->l_iclog_size = mp->m_logbsize;
-               log->l_iclog_size_log = 0;
-               while (size != 1) {
-                       log->l_iclog_size_log++;
-                       size >>= 1;
-               }
-
-               if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-                       /* # headers = size / 32k
-                        * one header holds cycles from 32k of data
-                        */
-
-                       xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
-                       if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE)
-                               xhdrs++;
-                       log->l_iclog_hsize = xhdrs << BBSHIFT;
-                       log->l_iclog_heads = xhdrs;
-               } else {
-                       ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE);
-                       log->l_iclog_hsize = BBSIZE;
-                       log->l_iclog_heads = 1;
-               }
-               goto done;
-       }
-
-       /* All machines use 32kB buffers by default. */
-       log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
-       log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
-
-       /* the default log size is 16k or 32k which is one header sector */
-       log->l_iclog_hsize = BBSIZE;
-       log->l_iclog_heads = 1;
-
-done:
-       /* are we being asked to make the sizes selected above visible? */
-       if (mp->m_logbufs == 0)
-               mp->m_logbufs = log->l_iclog_bufs;
-       if (mp->m_logbsize == 0)
-               mp->m_logbsize = log->l_iclog_size;
-}      /* xlog_get_iclog_buffer_size */
-
+       log->l_iclog_heads =
+               DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
+       log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
+}
 
 void
 xfs_log_work_queue(
@@ -1422,7 +1337,6 @@ xlog_alloc_log(
        xlog_rec_header_t       *head;
        xlog_in_core_t          **iclogp;
        xlog_in_core_t          *iclog, *prev_iclog=NULL;
-       xfs_buf_t               *bp;
        int                     i;
        int                     error = -ENOMEM;
        uint                    log2_size = 0;
@@ -1480,30 +1394,6 @@ xlog_alloc_log(
 
        xlog_get_iclog_buffer_size(mp, log);
 
-       /*
-        * Use a NULL block for the extra log buffer used during splits so that
-        * it will trigger errors if we ever try to do IO on it without first
-        * having set it up properly.
-        */
-       error = -ENOMEM;
-       bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
-                          BTOBB(log->l_iclog_size), XBF_NO_IOACCT);
-       if (!bp)
-               goto out_free_log;
-
-       /*
-        * The iclogbuf buffer locks are held over IO but we are not going to do
-        * IO yet.  Hence unlock the buffer so that the log IO path can grab it
-        * when appropriately.
-        */
-       ASSERT(xfs_buf_islocked(bp));
-       xfs_buf_unlock(bp);
-
-       /* use high priority wq for log I/O completion */
-       bp->b_ioend_wq = mp->m_log_workqueue;
-       bp->b_iodone = xlog_iodone;
-       log->l_xbuf = bp;
-
        spin_lock_init(&log->l_icloglock);
        init_waitqueue_head(&log->l_flush_wait);
 
@@ -1516,29 +1406,22 @@ xlog_alloc_log(
         * xlog_in_core_t in xfs_log_priv.h for details.
         */
        ASSERT(log->l_iclog_size >= 4096);
-       for (i=0; i < log->l_iclog_bufs; i++) {
-               *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
-               if (!*iclogp)
+       for (i = 0; i < log->l_iclog_bufs; i++) {
+               size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
+                               sizeof(struct bio_vec);
+
+               iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
+               if (!iclog)
                        goto out_free_iclog;
 
-               iclog = *iclogp;
+               *iclogp = iclog;
                iclog->ic_prev = prev_iclog;
                prev_iclog = iclog;
 
-               bp = xfs_buf_get_uncached(mp->m_logdev_targp,
-                                         BTOBB(log->l_iclog_size),
-                                         XBF_NO_IOACCT);
-               if (!bp)
+               iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
+                               KM_MAYFAIL);
+               if (!iclog->ic_data)
                        goto out_free_iclog;
-
-               ASSERT(xfs_buf_islocked(bp));
-               xfs_buf_unlock(bp);
-
-               /* use high priority wq for log I/O completion */
-               bp->b_ioend_wq = mp->m_log_workqueue;
-               bp->b_iodone = xlog_iodone;
-               iclog->ic_bp = bp;
-               iclog->ic_data = bp->b_addr;
 #ifdef DEBUG
                log->l_iclog_bak[i] = &iclog->ic_header;
 #endif
@@ -1552,36 +1435,43 @@ xlog_alloc_log(
                head->h_fmt = cpu_to_be32(XLOG_FMT);
                memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
 
-               iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize;
+               iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
                iclog->ic_state = XLOG_STATE_ACTIVE;
                iclog->ic_log = log;
                atomic_set(&iclog->ic_refcnt, 0);
                spin_lock_init(&iclog->ic_callback_lock);
-               iclog->ic_callback_tail = &(iclog->ic_callback);
+               INIT_LIST_HEAD(&iclog->ic_callbacks);
                iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
 
                init_waitqueue_head(&iclog->ic_force_wait);
                init_waitqueue_head(&iclog->ic_write_wait);
+               INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
+               sema_init(&iclog->ic_sema, 1);
 
                iclogp = &iclog->ic_next;
        }
        *iclogp = log->l_iclog;                 /* complete ring */
        log->l_iclog->ic_prev = prev_iclog;     /* re-write 1st prev ptr */
 
+       log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
+                       WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
+                       mp->m_fsname);
+       if (!log->l_ioend_workqueue)
+               goto out_free_iclog;
+
        error = xlog_cil_init(log);
        if (error)
-               goto out_free_iclog;
+               goto out_destroy_workqueue;
        return log;
 
+out_destroy_workqueue:
+       destroy_workqueue(log->l_ioend_workqueue);
 out_free_iclog:
        for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
                prev_iclog = iclog->ic_next;
-               if (iclog->ic_bp)
-                       xfs_buf_free(iclog->ic_bp);
+               kmem_free(iclog->ic_data);
                kmem_free(iclog);
        }
-       spinlock_destroy(&log->l_icloglock);
-       xfs_buf_free(log->l_xbuf);
 out_free_log:
        kmem_free(log);
 out:
@@ -1766,42 +1656,155 @@ xlog_cksum(
        return xfs_end_cksum(crc);
 }
 
-/*
- * The bdstrat callback function for log bufs. This gives us a central
- * place to trap bufs in case we get hit by a log I/O error and need to
- * shutdown. Actually, in practice, even when we didn't get a log error,
- * we transition the iclogs to IOERROR state *after* flushing all existing
- * iclogs to disk. This is because we don't want anymore new transactions to be
- * started or completed afterwards.
- *
- * We lock the iclogbufs here so that we can serialise against IO completion
- * during unmount. We might be processing a shutdown triggered during unmount,
- * and that can occur asynchronously to the unmount thread, and hence we need to
- * ensure that completes before tearing down the iclogbufs. Hence we need to
- * hold the buffer lock across the log IO to acheive that.
- */
-STATIC int
-xlog_bdstrat(
-       struct xfs_buf          *bp)
+static void
+xlog_bio_end_io(
+       struct bio              *bio)
 {
-       struct xlog_in_core     *iclog = bp->b_log_item;
+       struct xlog_in_core     *iclog = bio->bi_private;
 
-       xfs_buf_lock(bp);
-       if (iclog->ic_state & XLOG_STATE_IOERROR) {
-               xfs_buf_ioerror(bp, -EIO);
-               xfs_buf_stale(bp);
-               xfs_buf_ioend(bp);
+       queue_work(iclog->ic_log->l_ioend_workqueue,
+                  &iclog->ic_end_io_work);
+}
+
+static void
+xlog_map_iclog_data(
+       struct bio              *bio,
+       void                    *data,
+       size_t                  count)
+{
+       do {
+               struct page     *page = kmem_to_page(data);
+               unsigned int    off = offset_in_page(data);
+               size_t          len = min_t(size_t, count, PAGE_SIZE - off);
+
+               WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len);
+
+               data += len;
+               count -= len;
+       } while (count);
+}
+
+STATIC void
+xlog_write_iclog(
+       struct xlog             *log,
+       struct xlog_in_core     *iclog,
+       uint64_t                bno,
+       unsigned int            count,
+       bool                    need_flush)
+{
+       ASSERT(bno < log->l_logBBsize);
+
+       /*
+        * We lock the iclogbufs here so that we can serialise against I/O
+        * completion during unmount.  We might be processing a shutdown
+        * triggered during unmount, and that can occur asynchronously to the
+        * unmount thread, and hence we need to ensure that completes before
+        * tearing down the iclogbufs.  Hence we need to hold the buffer lock
+        * across the log IO to archieve that.
+        */
+       down(&iclog->ic_sema);
+       if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
                /*
                 * It would seem logical to return EIO here, but we rely on
                 * the log state machine to propagate I/O errors instead of
-                * doing it here. Similarly, IO completion will unlock the
-                * buffer, so we don't do it here.
+                * doing it here.  We kick of the state machine and unlock
+                * the buffer manually, the code needs to be kept in sync
+                * with the I/O completion path.
                 */
-               return 0;
+               xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
+               up(&iclog->ic_sema);
+               return;
        }
 
-       xfs_buf_submit(bp);
-       return 0;
+       iclog->ic_io_size = count;
+
+       bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
+       bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
+       iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
+       iclog->ic_bio.bi_end_io = xlog_bio_end_io;
+       iclog->ic_bio.bi_private = iclog;
+       iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA;
+       if (need_flush)
+               iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+
+       xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
+       if (is_vmalloc_addr(iclog->ic_data))
+               flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
+
+       /*
+        * If this log buffer would straddle the end of the log we will have
+        * to split it up into two bios, so that we can continue at the start.
+        */
+       if (bno + BTOBB(count) > log->l_logBBsize) {
+               struct bio *split;
+
+               split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
+                                 GFP_NOIO, &fs_bio_set);
+               bio_chain(split, &iclog->ic_bio);
+               submit_bio(split);
+
+               /* restart at logical offset zero for the remainder */
+               iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
+       }
+
+       submit_bio(&iclog->ic_bio);
+}
+
+/*
+ * We need to bump cycle number for the part of the iclog that is
+ * written to the start of the log. Watch out for the header magic
+ * number case, though.
+ */
+static void
+xlog_split_iclog(
+       struct xlog             *log,
+       void                    *data,
+       uint64_t                bno,
+       unsigned int            count)
+{
+       unsigned int            split_offset = BBTOB(log->l_logBBsize - bno);
+       unsigned int            i;
+
+       for (i = split_offset; i < count; i += BBSIZE) {
+               uint32_t cycle = get_unaligned_be32(data + i);
+
+               if (++cycle == XLOG_HEADER_MAGIC_NUM)
+                       cycle++;
+               put_unaligned_be32(cycle, data + i);
+       }
+}
+
+static int
+xlog_calc_iclog_size(
+       struct xlog             *log,
+       struct xlog_in_core     *iclog,
+       uint32_t                *roundoff)
+{
+       uint32_t                count_init, count;
+       bool                    use_lsunit;
+
+       use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
+                       log->l_mp->m_sb.sb_logsunit > 1;
+
+       /* Add for LR header */
+       count_init = log->l_iclog_hsize + iclog->ic_offset;
+
+       /* Round out the log write size */
+       if (use_lsunit) {
+               /* we have a v2 stripe unit to use */
+               count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
+       } else {
+               count = BBTOB(BTOBB(count_init));
+       }
+
+       ASSERT(count >= count_init);
+       *roundoff = count - count_init;
+
+       if (use_lsunit)
+               ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit);
+       else
+               ASSERT(*roundoff < BBTOB(1));
+       return count;
 }
 
 /*
@@ -1824,46 +1827,23 @@ xlog_bdstrat(
  * log will require grabbing the lock though.
  *
  * The entire log manager uses a logical block numbering scheme.  Only
- * log_sync (and then only bwrite()) know about the fact that the log may
- * not start with block zero on a given device.  The log block start offset
- * is added immediately before calling bwrite().
+ * xlog_write_iclog knows about the fact that the log may not start with
+ * block zero on a given device.
  */
-
-STATIC int
+STATIC void
 xlog_sync(
        struct xlog             *log,
        struct xlog_in_core     *iclog)
 {
-       xfs_buf_t       *bp;
-       int             i;
-       uint            count;          /* byte count of bwrite */
-       uint            count_init;     /* initial count before roundup */
-       int             roundoff;       /* roundoff to BB or stripe */
-       int             split = 0;      /* split write into two regions */
-       int             error;
-       int             v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
-       int             size;
+       unsigned int            count;          /* byte count of bwrite */
+       unsigned int            roundoff;       /* roundoff to BB or stripe */
+       uint64_t                bno;
+       unsigned int            size;
+       bool                    need_flush = true, split = false;
 
-       XFS_STATS_INC(log->l_mp, xs_log_writes);
        ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 
-       /* Add for LR header */
-       count_init = log->l_iclog_hsize + iclog->ic_offset;
-
-       /* Round out the log write size */
-       if (v2 && log->l_mp->m_sb.sb_logsunit > 1) {
-               /* we have a v2 stripe unit to use */
-               count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
-       } else {
-               count = BBTOB(BTOBB(count_init));
-       }
-       roundoff = count - count_init;
-       ASSERT(roundoff >= 0);
-       ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && 
-                roundoff < log->l_mp->m_sb.sb_logsunit)
-               || 
-               (log->l_mp->m_sb.sb_logsunit <= 1 && 
-                roundoff < BBTOB(1)));
+       count = xlog_calc_iclog_size(log, iclog, &roundoff);
 
        /* move grant heads by roundoff in sync */
        xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
@@ -1874,41 +1854,19 @@ xlog_sync(
 
        /* real byte length */
        size = iclog->ic_offset;
-       if (v2)
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
                size += roundoff;
        iclog->ic_header.h_len = cpu_to_be32(size);
 
-       bp = iclog->ic_bp;
-       XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
-
+       XFS_STATS_INC(log->l_mp, xs_log_writes);
        XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
 
-       /* Do we need to split this write into 2 parts? */
-       if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
-               char            *dptr;
-
-               split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
-               count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
-               iclog->ic_bwritecnt = 2;
+       bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
 
-               /*
-                * Bump the cycle numbers at the start of each block in the
-                * part of the iclog that ends up in the buffer that gets
-                * written to the start of the log.
-                *
-                * Watch out for the header magic number case, though.
-                */
-               dptr = (char *)&iclog->ic_header + count;
-               for (i = 0; i < split; i += BBSIZE) {
-                       uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
-                       if (++cycle == XLOG_HEADER_MAGIC_NUM)
-                               cycle++;
-                       *(__be32 *)dptr = cpu_to_be32(cycle);
-
-                       dptr += BBSIZE;
-               }
-       } else {
-               iclog->ic_bwritecnt = 1;
+       /* Do we need to split this write into 2 parts? */
+       if (bno + BTOBB(count) > log->l_logBBsize) {
+               xlog_split_iclog(log, &iclog->ic_header, bno, count);
+               split = true;
        }
 
        /* calculcate the checksum */
@@ -1921,18 +1879,15 @@ xlog_sync(
         * write on I/O completion and shutdown the fs. The subsequent mount
         * detects the bad CRC and attempts to recover.
         */
+#ifdef DEBUG
        if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
                iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
-               iclog->ic_state |= XLOG_STATE_IOABORT;
+               iclog->ic_fail_crc = true;
                xfs_warn(log->l_mp,
        "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
                         be64_to_cpu(iclog->ic_header.h_lsn));
        }
-
-       bp->b_io_length = BTOBB(count);
-       bp->b_log_item = iclog;
-       bp->b_flags &= ~XBF_FLUSH;
-       bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
+#endif
 
        /*
         * Flush the data device before flushing the log to make sure all meta
@@ -1942,50 +1897,14 @@ xlog_sync(
         * synchronously here; for an internal log we can simply use the block
         * layer state machine for preflushes.
         */
-       if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+       if (log->l_targ != log->l_mp->m_ddev_targp || split) {
                xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
-       else
-               bp->b_flags |= XBF_FLUSH;
-
-       ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
-       ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-
-       xlog_verify_iclog(log, iclog, count, true);
-
-       /* account for log which doesn't start at block #0 */
-       XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
-
-       /*
-        * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
-        * is shutting down.
-        */
-       error = xlog_bdstrat(bp);
-       if (error) {
-               xfs_buf_ioerror_alert(bp, "xlog_sync");
-               return error;
+               need_flush = false;
        }
-       if (split) {
-               bp = iclog->ic_log->l_xbuf;
-               XFS_BUF_SET_ADDR(bp, 0);             /* logical 0 */
-               xfs_buf_associate_memory(bp,
-                               (char *)&iclog->ic_header + count, split);
-               bp->b_log_item = iclog;
-               bp->b_flags &= ~XBF_FLUSH;
-               bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
-
-               ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
-               ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-
-               /* account for internal log which doesn't start at block #0 */
-               XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
-               error = xlog_bdstrat(bp);
-               if (error) {
-                       xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
-                       return error;
-               }
-       }
-       return 0;
-}      /* xlog_sync */
+
+       xlog_verify_iclog(log, iclog, count);
+       xlog_write_iclog(log, iclog, bno, count, need_flush);
+}
 
 /*
  * Deallocate a log structure
@@ -2005,31 +1924,21 @@ xlog_dealloc_log(
         */
        iclog = log->l_iclog;
        for (i = 0; i < log->l_iclog_bufs; i++) {
-               xfs_buf_lock(iclog->ic_bp);
-               xfs_buf_unlock(iclog->ic_bp);
+               down(&iclog->ic_sema);
+               up(&iclog->ic_sema);
                iclog = iclog->ic_next;
        }
 
-       /*
-        * Always need to ensure that the extra buffer does not point to memory
-        * owned by another log buffer before we free it. Also, cycle the lock
-        * first to ensure we've completed IO on it.
-        */
-       xfs_buf_lock(log->l_xbuf);
-       xfs_buf_unlock(log->l_xbuf);
-       xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
-       xfs_buf_free(log->l_xbuf);
-
        iclog = log->l_iclog;
        for (i = 0; i < log->l_iclog_bufs; i++) {
-               xfs_buf_free(iclog->ic_bp);
                next_iclog = iclog->ic_next;
+               kmem_free(iclog->ic_data);
                kmem_free(iclog);
                iclog = next_iclog;
        }
-       spinlock_destroy(&log->l_icloglock);
 
        log->l_mp->m_log = NULL;
+       destroy_workqueue(log->l_ioend_workqueue);
        kmem_free(log);
 }      /* xlog_dealloc_log */
 
@@ -2610,7 +2519,7 @@ xlog_state_clean_log(
                if (iclog->ic_state == XLOG_STATE_DIRTY) {
                        iclog->ic_state = XLOG_STATE_ACTIVE;
                        iclog->ic_offset       = 0;
-                       ASSERT(iclog->ic_callback == NULL);
+                       ASSERT(list_empty_careful(&iclog->ic_callbacks));
                        /*
                         * If the number of ops in this iclog indicate it just
                         * contains the dummy transaction, we can
@@ -2680,37 +2589,32 @@ xlog_state_clean_log(
 
 STATIC xfs_lsn_t
 xlog_get_lowest_lsn(
-       struct xlog     *log)
+       struct xlog             *log)
 {
-       xlog_in_core_t  *lsn_log;
-       xfs_lsn_t       lowest_lsn, lsn;
+       struct xlog_in_core     *iclog = log->l_iclog;
+       xfs_lsn_t               lowest_lsn = 0, lsn;
 
-       lsn_log = log->l_iclog;
-       lowest_lsn = 0;
        do {
-           if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) {
-               lsn = be64_to_cpu(lsn_log->ic_header.h_lsn);
-               if ((lsn && !lowest_lsn) ||
-                   (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) {
+               if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
+                       continue;
+
+               lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+               if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
                        lowest_lsn = lsn;
-               }
-           }
-           lsn_log = lsn_log->ic_next;
-       } while (lsn_log != log->l_iclog);
+       } while ((iclog = iclog->ic_next) != log->l_iclog);
+
        return lowest_lsn;
 }
 
-
 STATIC void
 xlog_state_do_callback(
        struct xlog             *log,
-       int                     aborted,
+       bool                    aborted,
        struct xlog_in_core     *ciclog)
 {
        xlog_in_core_t     *iclog;
        xlog_in_core_t     *first_iclog;        /* used to know when we've
                                                 * processed all iclogs once */
-       xfs_log_callback_t *cb, *cb_next;
        int                flushcnt = 0;
        xfs_lsn_t          lowest_lsn;
        int                ioerrors;    /* counter: iclogs with errors */
@@ -2821,7 +2725,7 @@ xlog_state_do_callback(
                                 */
                                ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
                                        be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
-                               if (iclog->ic_callback)
+                               if (!list_empty_careful(&iclog->ic_callbacks))
                                        atomic64_set(&log->l_last_sync_lsn,
                                                be64_to_cpu(iclog->ic_header.h_lsn));
 
@@ -2838,26 +2742,20 @@ xlog_state_do_callback(
                         * callbacks being added.
                         */
                        spin_lock(&iclog->ic_callback_lock);
-                       cb = iclog->ic_callback;
-                       while (cb) {
-                               iclog->ic_callback_tail = &(iclog->ic_callback);
-                               iclog->ic_callback = NULL;
-                               spin_unlock(&iclog->ic_callback_lock);
+                       while (!list_empty(&iclog->ic_callbacks)) {
+                               LIST_HEAD(tmp);
 
-                               /* perform callbacks in the order given */
-                               for (; cb; cb = cb_next) {
-                                       cb_next = cb->cb_next;
-                                       cb->cb_func(cb->cb_arg, aborted);
-                               }
+                               list_splice_init(&iclog->ic_callbacks, &tmp);
+
+                               spin_unlock(&iclog->ic_callback_lock);
+                               xlog_cil_process_committed(&tmp, aborted);
                                spin_lock(&iclog->ic_callback_lock);
-                               cb = iclog->ic_callback;
                        }
 
                        loopdidcallbacks++;
                        funcdidcallbacks++;
 
                        spin_lock(&log->l_icloglock);
-                       ASSERT(iclog->ic_callback == NULL);
                        spin_unlock(&iclog->ic_callback_lock);
                        if (!(iclog->ic_state & XLOG_STATE_IOERROR))
                                iclog->ic_state = XLOG_STATE_DIRTY;
@@ -2943,18 +2841,16 @@ xlog_state_do_callback(
  */
 STATIC void
 xlog_state_done_syncing(
-       xlog_in_core_t  *iclog,
-       int             aborted)
+       struct xlog_in_core     *iclog,
+       bool                    aborted)
 {
-       struct xlog        *log = iclog->ic_log;
+       struct xlog             *log = iclog->ic_log;
 
        spin_lock(&log->l_icloglock);
 
        ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
               iclog->ic_state == XLOG_STATE_IOERROR);
        ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
-       ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
-
 
        /*
         * If we got an error, either on the first buffer, or in the case of
@@ -2962,13 +2858,8 @@ xlog_state_done_syncing(
         * and none should ever be attempted to be written to disk
         * again.
         */
-       if (iclog->ic_state != XLOG_STATE_IOERROR) {
-               if (--iclog->ic_bwritecnt == 1) {
-                       spin_unlock(&log->l_icloglock);
-                       return;
-               }
+       if (iclog->ic_state != XLOG_STATE_IOERROR)
                iclog->ic_state = XLOG_STATE_DONE_SYNC;
-       }
 
        /*
         * Someone could be sleeping prior to writing out the next
@@ -3237,7 +3128,7 @@ xlog_state_release_iclog(
         * flags after this point.
         */
        if (sync)
-               return xlog_sync(log, iclog);
+               xlog_sync(log, iclog);
        return 0;
 }      /* xlog_state_release_iclog */
 
@@ -3828,8 +3719,7 @@ STATIC void
 xlog_verify_iclog(
        struct xlog             *log,
        struct xlog_in_core     *iclog,
-       int                     count,
-       bool                    syncing)
+       int                     count)
 {
        xlog_op_header_t        *ophead;
        xlog_in_core_t          *icptr;
@@ -3873,7 +3763,7 @@ xlog_verify_iclog(
                /* clientid is only 1 byte */
                p = &ophead->oh_clientid;
                field_offset = p - base_ptr;
-               if (!syncing || (field_offset & 0x1ff)) {
+               if (field_offset & 0x1ff) {
                        clientid = ophead->oh_clientid;
                } else {
                        idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
@@ -3896,7 +3786,7 @@ xlog_verify_iclog(
                /* check length */
                p = &ophead->oh_len;
                field_offset = p - base_ptr;
-               if (!syncing || (field_offset & 0x1ff)) {
+               if (field_offset & 0x1ff) {
                        op_len = be32_to_cpu(ophead->oh_len);
                } else {
                        idx = BTOBBT((uintptr_t)&ophead->oh_len -
@@ -4033,7 +3923,7 @@ xfs_log_force_umount(
         * avoid races.
         */
        wake_up_all(&log->l_cilp->xc_commit_wait);
-       xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
+       xlog_state_do_callback(log, true, NULL);
 
 #ifdef XFSERRORDEBUG
        {
index 73a64bf32f6f4a9214bebf2080d4a556ea634d1e..84e06805160f88d6d490153b3138e31a0c5e91b8 100644 (file)
@@ -6,6 +6,8 @@
 #ifndef        __XFS_LOG_H__
 #define __XFS_LOG_H__
 
+struct xfs_cil_ctx;
+
 struct xfs_log_vec {
        struct xfs_log_vec      *lv_next;       /* next lv in build list */
        int                     lv_niovecs;     /* number of iovecs in lv */
@@ -71,16 +73,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
        return buf;
 }
 
-/*
- * Structure used to pass callback function and the function's argument
- * to the log manager.
- */
-typedef struct xfs_log_callback {
-       struct xfs_log_callback *cb_next;
-       void                    (*cb_func)(void *, int);
-       void                    *cb_arg;
-} xfs_log_callback_t;
-
 /*
  * By comparing each component, we don't have to worry about extra
  * endian issues in treating two 32 bit numbers as one 64 bit number
@@ -125,12 +117,10 @@ int         xfs_log_mount(struct xfs_mount        *mp,
                        xfs_daddr_t             start_block,
                        int                     num_bblocks);
 int      xfs_log_mount_finish(struct xfs_mount *mp);
-int    xfs_log_mount_cancel(struct xfs_mount *);
+void   xfs_log_mount_cancel(struct xfs_mount *);
 xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
 xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
 void     xfs_log_space_wake(struct xfs_mount *mp);
-int      xfs_log_notify(struct xlog_in_core    *iclog,
-                        struct xfs_log_callback *callback_entry);
 int      xfs_log_release_iclog(struct xfs_mount *mp,
                         struct xlog_in_core     *iclog);
 int      xfs_log_reserve(struct xfs_mount *mp,
@@ -148,6 +138,7 @@ void          xfs_log_ticket_put(struct xlog_ticket *ticket);
 
 void   xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
                                xfs_lsn_t *commit_lsn, bool regrant);
+void   xlog_cil_process_committed(struct list_head *list, bool aborted);
 bool   xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 
 void   xfs_log_work_queue(struct xfs_mount *mp);
index 5e595948bc5a6ffabad05fbfda64ef5413d36623..fa5602d0fd7f6567bd1091f81d134144c5abbcd9 100644 (file)
 #include "xfs_shared.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_error.h"
-#include "xfs_alloc.h"
 #include "xfs_extent_busy.h"
-#include "xfs_discard.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_log.h"
@@ -246,7 +243,8 @@ xfs_cil_prepare_item(
         * shadow buffer, so update the the pointer to it appropriately.
         */
        if (!old_lv) {
-               lv->lv_item->li_ops->iop_pin(lv->lv_item);
+               if (lv->lv_item->li_ops->iop_pin)
+                       lv->lv_item->li_ops->iop_pin(lv->lv_item);
                lv->lv_item->li_lv_shadow = NULL;
        } else if (old_lv != lv) {
                ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
@@ -576,10 +574,9 @@ xlog_discard_busy_extents(
  */
 static void
 xlog_cil_committed(
-       void    *args,
-       int     abort)
+       struct xfs_cil_ctx      *ctx,
+       bool                    abort)
 {
-       struct xfs_cil_ctx      *ctx = args;
        struct xfs_mount        *mp = ctx->cil->xc_log->l_mp;
 
        /*
@@ -614,6 +611,20 @@ xlog_cil_committed(
                kmem_free(ctx);
 }
 
+void
+xlog_cil_process_committed(
+       struct list_head        *list,
+       bool                    aborted)
+{
+       struct xfs_cil_ctx      *ctx;
+
+       while ((ctx = list_first_entry_or_null(list,
+                       struct xfs_cil_ctx, iclog_entry))) {
+               list_del(&ctx->iclog_entry);
+               xlog_cil_committed(ctx, aborted);
+       }
+}
+
 /*
  * Push the Committed Item List to the log. If @push_seq flag is zero, then it
  * is a background flush and so we can chose to ignore it. Otherwise, if the
@@ -835,12 +846,15 @@ restart:
        if (commit_lsn == -1)
                goto out_abort;
 
-       /* attach all the transactions w/ busy extents to iclog */
-       ctx->log_cb.cb_func = xlog_cil_committed;
-       ctx->log_cb.cb_arg = ctx;
-       error = xfs_log_notify(commit_iclog, &ctx->log_cb);
-       if (error)
+       spin_lock(&commit_iclog->ic_callback_lock);
+       if (commit_iclog->ic_state & XLOG_STATE_IOERROR) {
+               spin_unlock(&commit_iclog->ic_callback_lock);
                goto out_abort;
+       }
+       ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
+                     commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
+       list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
+       spin_unlock(&commit_iclog->ic_callback_lock);
 
        /*
         * now the checkpoint commit is complete and we've attached the
@@ -864,7 +878,7 @@ out_skip:
 out_abort_free_ticket:
        xfs_log_ticket_put(tic);
 out_abort:
-       xlog_cil_committed(ctx, XFS_LI_ABORTED);
+       xlog_cil_committed(ctx, true);
        return -EIO;
 }
 
@@ -984,6 +998,7 @@ xfs_log_commit_cil(
 {
        struct xlog             *log = mp->m_log;
        struct xfs_cil          *cil = log->l_cilp;
+       struct xfs_log_item     *lip, *next;
        xfs_lsn_t               xc_commit_lsn;
 
        /*
@@ -1008,7 +1023,7 @@ xfs_log_commit_cil(
 
        /*
         * Once all the items of the transaction have been copied to the CIL,
-        * the items can be unlocked and freed.
+        * the items can be unlocked and possibly freed.
         *
         * This needs to be done before we drop the CIL context lock because we
         * have to update state in the log items and unlock them before they go
@@ -1017,8 +1032,12 @@ xfs_log_commit_cil(
         * the log items. This affects (at least) processing of stale buffers,
         * inodes and EFIs.
         */
-       xfs_trans_free_items(tp, xc_commit_lsn, false);
-
+       trace_xfs_trans_commit_items(tp, _RET_IP_);
+       list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+               xfs_trans_del_item(lip);
+               if (lip->li_ops->iop_committing)
+                       lip->li_ops->iop_committing(lip, xc_commit_lsn);
+       }
        xlog_cil_push_background(log);
 
        up_read(&cil->xc_ctx_lock);
index b5f82cb362020c7451b823c2ff7d1e23840e12fd..b880c23cb6e4ffd78324ff26a2890c0010f67d64 100644 (file)
@@ -10,7 +10,6 @@ struct xfs_buf;
 struct xlog;
 struct xlog_ticket;
 struct xfs_mount;
-struct xfs_log_callback;
 
 /*
  * Flags for log structure
@@ -50,7 +49,6 @@ static inline uint xlog_get_client_id(__be32 i)
 #define XLOG_STATE_CALLBACK  0x0020 /* Callback functions now */
 #define XLOG_STATE_DIRTY     0x0040 /* Dirty IC log, not ready for ACTIVE status*/
 #define XLOG_STATE_IOERROR   0x0080 /* IO error happened in sync'ing log */
-#define XLOG_STATE_IOABORT   0x0100 /* force abort on I/O completion (debug) */
 #define XLOG_STATE_ALL      0x7FFF /* All possible valid flags */
 #define XLOG_STATE_NOTUSED   0x8000 /* This IC log not being used */
 
@@ -179,11 +177,10 @@ typedef struct xlog_ticket {
  *     the iclog.
  * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
  * - ic_next is the pointer to the next iclog in the ring.
- * - ic_bp is a pointer to the buffer used to write this incore log to disk.
  * - ic_log is a pointer back to the global log structure.
- * - ic_callback is a linked list of callback function/argument pairs to be
- *     called after an iclog finishes writing.
- * - ic_size is the full size of the header plus data.
+ * - ic_size is the full size of the log buffer, minus the cycle headers.
+ * - ic_io_size is the size of the currently pending log buffer write, which
+ *     might be smaller than ic_size
  * - ic_offset is the current number of bytes written to in this iclog.
  * - ic_refcnt is bumped when someone is writing to the log.
  * - ic_state is the state of the iclog.
@@ -193,7 +190,7 @@ typedef struct xlog_ticket {
  * structure cacheline aligned. The following fields can be contended on
  * by independent processes:
  *
- *     - ic_callback_*
+ *     - ic_callbacks
  *     - ic_refcnt
  *     - fields protected by the global l_icloglock
  *
@@ -206,23 +203,28 @@ typedef struct xlog_in_core {
        wait_queue_head_t       ic_write_wait;
        struct xlog_in_core     *ic_next;
        struct xlog_in_core     *ic_prev;
-       struct xfs_buf          *ic_bp;
        struct xlog             *ic_log;
-       int                     ic_size;
-       int                     ic_offset;
-       int                     ic_bwritecnt;
+       u32                     ic_size;
+       u32                     ic_io_size;
+       u32                     ic_offset;
        unsigned short          ic_state;
        char                    *ic_datap;      /* pointer to iclog data */
 
        /* Callback structures need their own cacheline */
        spinlock_t              ic_callback_lock ____cacheline_aligned_in_smp;
-       struct xfs_log_callback *ic_callback;
-       struct xfs_log_callback **ic_callback_tail;
+       struct list_head        ic_callbacks;
 
        /* reference counts need their own cacheline */
        atomic_t                ic_refcnt ____cacheline_aligned_in_smp;
        xlog_in_core_2_t        *ic_data;
 #define ic_header      ic_data->hic_header
+#ifdef DEBUG
+       bool                    ic_fail_crc : 1;
+#endif
+       struct semaphore        ic_sema;
+       struct work_struct      ic_end_io_work;
+       struct bio              ic_bio;
+       struct bio_vec          ic_bvec[];
 } xlog_in_core_t;
 
 /*
@@ -243,7 +245,7 @@ struct xfs_cil_ctx {
        int                     space_used;     /* aggregate size of regions */
        struct list_head        busy_extents;   /* busy extents in chkpt */
        struct xfs_log_vec      *lv_chain;      /* logvecs being pushed */
-       struct xfs_log_callback log_cb;         /* completion callback hook. */
+       struct list_head        iclog_entry;
        struct list_head        committing;     /* ctx committing list */
        struct work_struct      discard_endio_work;
 };
@@ -350,9 +352,8 @@ struct xlog {
        struct xfs_mount        *l_mp;          /* mount point */
        struct xfs_ail          *l_ailp;        /* AIL log is working with */
        struct xfs_cil          *l_cilp;        /* CIL log is working with */
-       struct xfs_buf          *l_xbuf;        /* extra buffer for log
-                                                * wrapping */
        struct xfs_buftarg      *l_targ;        /* buftarg of log */
+       struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */
        struct delayed_work     l_work;         /* background flush work */
        uint                    l_flags;
        uint                    l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
@@ -361,7 +362,6 @@ struct xlog {
        int                     l_iclog_heads;  /* # of iclog header sectors */
        uint                    l_sectBBsize;   /* sector size in BBs (2^n) */
        int                     l_iclog_size;   /* size of log in bytes */
-       int                     l_iclog_size_log; /* log power size of log */
        int                     l_iclog_bufs;   /* number of iclog buffers */
        xfs_daddr_t             l_logBBstart;   /* start block of log */
        int                     l_logsize;      /* size of log in bytes */
@@ -418,7 +418,7 @@ xlog_recover(
 extern int
 xlog_recover_finish(
        struct xlog             *log);
-extern int
+extern void
 xlog_recover_cancel(struct xlog *);
 
 extern __le32   xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
index 9329f5adbfbef28648f169ce216b034a64b7d3fc..13d1d3e95b888fb2630c784869bf932092a51666 100644 (file)
@@ -13,8 +13,6 @@
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_log.h"
@@ -26,7 +24,6 @@
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_quota.h"
-#include "xfs_cksum.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_bmap_btree.h"
@@ -79,7 +76,7 @@ struct xfs_buf_cancel {
  * are valid, false otherwise.
  */
 static inline bool
-xlog_verify_bp(
+xlog_verify_bno(
        struct xlog     *log,
        xfs_daddr_t     blk_no,
        int             bbcount)
@@ -92,22 +89,19 @@ xlog_verify_bp(
 }
 
 /*
- * Allocate a buffer to hold log data.  The buffer needs to be able
- * to map to a range of nbblks basic blocks at any valid (basic
- * block) offset within the log.
+ * Allocate a buffer to hold log data.  The buffer needs to be able to map to
+ * a range of nbblks basic blocks at any valid offset within the log.
  */
-STATIC xfs_buf_t *
-xlog_get_bp(
+static char *
+xlog_alloc_buffer(
        struct xlog     *log,
        int             nbblks)
 {
-       struct xfs_buf  *bp;
-
        /*
         * Pass log block 0 since we don't have an addr yet, buffer will be
         * verified on read.
         */
-       if (!xlog_verify_bp(log, 0, nbblks)) {
+       if (!xlog_verify_bno(log, 0, nbblks)) {
                xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
                        nbblks);
                XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
@@ -115,69 +109,48 @@ xlog_get_bp(
        }
 
        /*
-        * We do log I/O in units of log sectors (a power-of-2
-        * multiple of the basic block size), so we round up the
-        * requested size to accommodate the basic blocks required
-        * for complete log sectors.
+        * We do log I/O in units of log sectors (a power-of-2 multiple of the
+        * basic block size), so we round up the requested size to accommodate
+        * the basic blocks required for complete log sectors.
         *
-        * In addition, the buffer may be used for a non-sector-
-        * aligned block offset, in which case an I/O of the
-        * requested size could extend beyond the end of the
-        * buffer.  If the requested size is only 1 basic block it
-        * will never straddle a sector boundary, so this won't be
-        * an issue.  Nor will this be a problem if the log I/O is
-        * done in basic blocks (sector size 1).  But otherwise we
-        * extend the buffer by one extra log sector to ensure
-        * there's space to accommodate this possibility.
+        * In addition, the buffer may be used for a non-sector-aligned block
+        * offset, in which case an I/O of the requested size could extend
+        * beyond the end of the buffer.  If the requested size is only 1 basic
+        * block it will never straddle a sector boundary, so this won't be an
+        * issue.  Nor will this be a problem if the log I/O is done in basic
+        * blocks (sector size 1).  But otherwise we extend the buffer by one
+        * extra log sector to ensure there's space to accommodate this
+        * possibility.
         */
        if (nbblks > 1 && log->l_sectBBsize > 1)
                nbblks += log->l_sectBBsize;
        nbblks = round_up(nbblks, log->l_sectBBsize);
-
-       bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0);
-       if (bp)
-               xfs_buf_unlock(bp);
-       return bp;
-}
-
-STATIC void
-xlog_put_bp(
-       xfs_buf_t       *bp)
-{
-       xfs_buf_free(bp);
+       return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
 }
 
 /*
  * Return the address of the start of the given block number's data
  * in a log buffer.  The buffer covers a log sector-aligned region.
  */
-STATIC char *
+static inline unsigned int
 xlog_align(
        struct xlog     *log,
-       xfs_daddr_t     blk_no,
-       int             nbblks,
-       struct xfs_buf  *bp)
+       xfs_daddr_t     blk_no)
 {
-       xfs_daddr_t     offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
-
-       ASSERT(offset + nbblks <= bp->b_length);
-       return bp->b_addr + BBTOB(offset);
+       return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1));
 }
 
-
-/*
- * nbblks should be uint, but oh well.  Just want to catch that 32-bit length.
- */
-STATIC int
-xlog_bread_noalign(
-       struct xlog     *log,
-       xfs_daddr_t     blk_no,
-       int             nbblks,
-       struct xfs_buf  *bp)
+static int
+xlog_do_io(
+       struct xlog             *log,
+       xfs_daddr_t             blk_no,
+       unsigned int            nbblks,
+       char                    *data,
+       unsigned int            op)
 {
-       int             error;
+       int                     error;
 
-       if (!xlog_verify_bp(log, blk_no, nbblks)) {
+       if (!xlog_verify_bno(log, blk_no, nbblks)) {
                xfs_warn(log->l_mp,
                         "Invalid log block/length (0x%llx, 0x%x) for buffer",
                         blk_no, nbblks);
@@ -187,107 +160,53 @@ xlog_bread_noalign(
 
        blk_no = round_down(blk_no, log->l_sectBBsize);
        nbblks = round_up(nbblks, log->l_sectBBsize);
-
        ASSERT(nbblks > 0);
-       ASSERT(nbblks <= bp->b_length);
-
-       XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
-       bp->b_flags |= XBF_READ;
-       bp->b_io_length = nbblks;
-       bp->b_error = 0;
 
-       error = xfs_buf_submit(bp);
-       if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
-               xfs_buf_ioerror_alert(bp, __func__);
+       error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
+                       BBTOB(nbblks), data, op);
+       if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) {
+               xfs_alert(log->l_mp,
+                         "log recovery %s I/O error at daddr 0x%llx len %d error %d",
+                         op == REQ_OP_WRITE ? "write" : "read",
+                         blk_no, nbblks, error);
+       }
        return error;
 }
 
 STATIC int
-xlog_bread(
+xlog_bread_noalign(
        struct xlog     *log,
        xfs_daddr_t     blk_no,
        int             nbblks,
-       struct xfs_buf  *bp,
-       char            **offset)
+       char            *data)
 {
-       int             error;
-
-       error = xlog_bread_noalign(log, blk_no, nbblks, bp);
-       if (error)
-               return error;
-
-       *offset = xlog_align(log, blk_no, nbblks, bp);
-       return 0;
+       return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
 }
 
-/*
- * Read at an offset into the buffer. Returns with the buffer in it's original
- * state regardless of the result of the read.
- */
 STATIC int
-xlog_bread_offset(
+xlog_bread(
        struct xlog     *log,
-       xfs_daddr_t     blk_no,         /* block to read from */
-       int             nbblks,         /* blocks to read */
-       struct xfs_buf  *bp,
-       char            *offset)
+       xfs_daddr_t     blk_no,
+       int             nbblks,
+       char            *data,
+       char            **offset)
 {
-       char            *orig_offset = bp->b_addr;
-       int             orig_len = BBTOB(bp->b_length);
-       int             error, error2;
-
-       error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
-       if (error)
-               return error;
-
-       error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+       int             error;
 
-       /* must reset buffer pointer even on error */
-       error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
-       if (error)
-               return error;
-       return error2;
+       error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
+       if (!error)
+               *offset = data + xlog_align(log, blk_no);
+       return error;
 }
 
-/*
- * Write out the buffer at the given block for the given number of blocks.
- * The buffer is kept locked across the write and is returned locked.
- * This can only be used for synchronous log writes.
- */
 STATIC int
 xlog_bwrite(
        struct xlog     *log,
        xfs_daddr_t     blk_no,
        int             nbblks,
-       struct xfs_buf  *bp)
+       char            *data)
 {
-       int             error;
-
-       if (!xlog_verify_bp(log, blk_no, nbblks)) {
-               xfs_warn(log->l_mp,
-                        "Invalid log block/length (0x%llx, 0x%x) for buffer",
-                        blk_no, nbblks);
-               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
-               return -EFSCORRUPTED;
-       }
-
-       blk_no = round_down(blk_no, log->l_sectBBsize);
-       nbblks = round_up(nbblks, log->l_sectBBsize);
-
-       ASSERT(nbblks > 0);
-       ASSERT(nbblks <= bp->b_length);
-
-       XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
-       xfs_buf_hold(bp);
-       xfs_buf_lock(bp);
-       bp->b_io_length = nbblks;
-       bp->b_error = 0;
-
-       error = xfs_bwrite(bp);
-       if (error)
-               xfs_buf_ioerror_alert(bp, __func__);
-       xfs_buf_relse(bp);
-       return error;
+       return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE);
 }
 
 #ifdef DEBUG
@@ -377,10 +296,9 @@ xlog_recover_iodone(
                 * We're not going to bother about retrying
                 * this during recovery. One strike!
                 */
-               if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+               if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
                        xfs_buf_ioerror_alert(bp, __func__);
-                       xfs_force_shutdown(bp->b_target->bt_mount,
-                                               SHUTDOWN_META_IO_ERROR);
+                       xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
                }
        }
 
@@ -405,7 +323,7 @@ xlog_recover_iodone(
 STATIC int
 xlog_find_cycle_start(
        struct xlog     *log,
-       struct xfs_buf  *bp,
+       char            *buffer,
        xfs_daddr_t     first_blk,
        xfs_daddr_t     *last_blk,
        uint            cycle)
@@ -419,7 +337,7 @@ xlog_find_cycle_start(
        end_blk = *last_blk;
        mid_blk = BLK_AVG(first_blk, end_blk);
        while (mid_blk != first_blk && mid_blk != end_blk) {
-               error = xlog_bread(log, mid_blk, 1, bp, &offset);
+               error = xlog_bread(log, mid_blk, 1, buffer, &offset);
                if (error)
                        return error;
                mid_cycle = xlog_get_cycle(offset);
@@ -455,7 +373,7 @@ xlog_find_verify_cycle(
 {
        xfs_daddr_t     i, j;
        uint            cycle;
-       xfs_buf_t       *bp;
+       char            *buffer;
        xfs_daddr_t     bufblks;
        char            *buf = NULL;
        int             error = 0;
@@ -469,7 +387,7 @@ xlog_find_verify_cycle(
        bufblks = 1 << ffs(nbblks);
        while (bufblks > log->l_logBBsize)
                bufblks >>= 1;
-       while (!(bp = xlog_get_bp(log, bufblks))) {
+       while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
                bufblks >>= 1;
                if (bufblks < log->l_sectBBsize)
                        return -ENOMEM;
@@ -480,7 +398,7 @@ xlog_find_verify_cycle(
 
                bcount = min(bufblks, (start_blk + nbblks - i));
 
-               error = xlog_bread(log, i, bcount, bp, &buf);
+               error = xlog_bread(log, i, bcount, buffer, &buf);
                if (error)
                        goto out;
 
@@ -498,7 +416,7 @@ xlog_find_verify_cycle(
        *new_blk = -1;
 
 out:
-       xlog_put_bp(bp);
+       kmem_free(buffer);
        return error;
 }
 
@@ -522,7 +440,7 @@ xlog_find_verify_log_record(
        int                     extra_bblks)
 {
        xfs_daddr_t             i;
-       xfs_buf_t               *bp;
+       char                    *buffer;
        char                    *offset = NULL;
        xlog_rec_header_t       *head = NULL;
        int                     error = 0;
@@ -532,12 +450,14 @@ xlog_find_verify_log_record(
 
        ASSERT(start_blk != 0 || *last_blk != start_blk);
 
-       if (!(bp = xlog_get_bp(log, num_blks))) {
-               if (!(bp = xlog_get_bp(log, 1)))
+       buffer = xlog_alloc_buffer(log, num_blks);
+       if (!buffer) {
+               buffer = xlog_alloc_buffer(log, 1);
+               if (!buffer)
                        return -ENOMEM;
                smallmem = 1;
        } else {
-               error = xlog_bread(log, start_blk, num_blks, bp, &offset);
+               error = xlog_bread(log, start_blk, num_blks, buffer, &offset);
                if (error)
                        goto out;
                offset += ((num_blks - 1) << BBSHIFT);
@@ -554,7 +474,7 @@ xlog_find_verify_log_record(
                }
 
                if (smallmem) {
-                       error = xlog_bread(log, i, 1, bp, &offset);
+                       error = xlog_bread(log, i, 1, buffer, &offset);
                        if (error)
                                goto out;
                }
@@ -607,7 +527,7 @@ xlog_find_verify_log_record(
                *last_blk = i;
 
 out:
-       xlog_put_bp(bp);
+       kmem_free(buffer);
        return error;
 }
 
@@ -629,7 +549,7 @@ xlog_find_head(
        struct xlog     *log,
        xfs_daddr_t     *return_head_blk)
 {
-       xfs_buf_t       *bp;
+       char            *buffer;
        char            *offset;
        xfs_daddr_t     new_blk, first_blk, start_blk, last_blk, head_blk;
        int             num_scan_bblks;
@@ -659,20 +579,20 @@ xlog_find_head(
        }
 
        first_blk = 0;                  /* get cycle # of 1st block */
-       bp = xlog_get_bp(log, 1);
-       if (!bp)
+       buffer = xlog_alloc_buffer(log, 1);
+       if (!buffer)
                return -ENOMEM;
 
-       error = xlog_bread(log, 0, 1, bp, &offset);
+       error = xlog_bread(log, 0, 1, buffer, &offset);
        if (error)
-               goto bp_err;
+               goto out_free_buffer;
 
        first_half_cycle = xlog_get_cycle(offset);
 
        last_blk = head_blk = log_bbnum - 1;    /* get cycle # of last block */
-       error = xlog_bread(log, last_blk, 1, bp, &offset);
+       error = xlog_bread(log, last_blk, 1, buffer, &offset);
        if (error)
-               goto bp_err;
+               goto out_free_buffer;
 
        last_half_cycle = xlog_get_cycle(offset);
        ASSERT(last_half_cycle != 0);
@@ -740,9 +660,10 @@ xlog_find_head(
                 *                           ^ we want to locate this spot
                 */
                stop_on_cycle = last_half_cycle;
-               if ((error = xlog_find_cycle_start(log, bp, first_blk,
-                                               &head_blk, last_half_cycle)))
-                       goto bp_err;
+               error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk,
+                               last_half_cycle);
+               if (error)
+                       goto out_free_buffer;
        }
 
        /*
@@ -762,7 +683,7 @@ xlog_find_head(
                if ((error = xlog_find_verify_cycle(log,
                                                start_blk, num_scan_bblks,
                                                stop_on_cycle, &new_blk)))
-                       goto bp_err;
+                       goto out_free_buffer;
                if (new_blk != -1)
                        head_blk = new_blk;
        } else {                /* need to read 2 parts of log */
@@ -799,7 +720,7 @@ xlog_find_head(
                if ((error = xlog_find_verify_cycle(log, start_blk,
                                        num_scan_bblks - (int)head_blk,
                                        (stop_on_cycle - 1), &new_blk)))
-                       goto bp_err;
+                       goto out_free_buffer;
                if (new_blk != -1) {
                        head_blk = new_blk;
                        goto validate_head;
@@ -815,7 +736,7 @@ xlog_find_head(
                if ((error = xlog_find_verify_cycle(log,
                                        start_blk, (int)head_blk,
                                        stop_on_cycle, &new_blk)))
-                       goto bp_err;
+                       goto out_free_buffer;
                if (new_blk != -1)
                        head_blk = new_blk;
        }
@@ -834,13 +755,13 @@ validate_head:
                if (error == 1)
                        error = -EIO;
                if (error)
-                       goto bp_err;
+                       goto out_free_buffer;
        } else {
                start_blk = 0;
                ASSERT(head_blk <= INT_MAX);
                error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
                if (error < 0)
-                       goto bp_err;
+                       goto out_free_buffer;
                if (error == 1) {
                        /* We hit the beginning of the log during our search */
                        start_blk = log_bbnum - (num_scan_bblks - head_blk);
@@ -853,14 +774,14 @@ validate_head:
                        if (error == 1)
                                error = -EIO;
                        if (error)
-                               goto bp_err;
+                               goto out_free_buffer;
                        if (new_blk != log_bbnum)
                                head_blk = new_blk;
                } else if (error)
-                       goto bp_err;
+                       goto out_free_buffer;
        }
 
-       xlog_put_bp(bp);
+       kmem_free(buffer);
        if (head_blk == log_bbnum)
                *return_head_blk = 0;
        else
@@ -873,9 +794,8 @@ validate_head:
         */
        return 0;
 
- bp_err:
-       xlog_put_bp(bp);
-
+out_free_buffer:
+       kmem_free(buffer);
        if (error)
                xfs_warn(log->l_mp, "failed to find log head");
        return error;
@@ -895,7 +815,7 @@ xlog_rseek_logrec_hdr(
        xfs_daddr_t             head_blk,
        xfs_daddr_t             tail_blk,
        int                     count,
-       struct xfs_buf          *bp,
+       char                    *buffer,
        xfs_daddr_t             *rblk,
        struct xlog_rec_header  **rhead,
        bool                    *wrapped)
@@ -914,7 +834,7 @@ xlog_rseek_logrec_hdr(
         */
        end_blk = head_blk > tail_blk ? tail_blk : 0;
        for (i = (int) head_blk - 1; i >= end_blk; i--) {
-               error = xlog_bread(log, i, 1, bp, &offset);
+               error = xlog_bread(log, i, 1, buffer, &offset);
                if (error)
                        goto out_error;
 
@@ -933,7 +853,7 @@ xlog_rseek_logrec_hdr(
         */
        if (tail_blk >= head_blk && found != count) {
                for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
-                       error = xlog_bread(log, i, 1, bp, &offset);
+                       error = xlog_bread(log, i, 1, buffer, &offset);
                        if (error)
                                goto out_error;
 
@@ -969,7 +889,7 @@ xlog_seek_logrec_hdr(
        xfs_daddr_t             head_blk,
        xfs_daddr_t             tail_blk,
        int                     count,
-       struct xfs_buf          *bp,
+       char                    *buffer,
        xfs_daddr_t             *rblk,
        struct xlog_rec_header  **rhead,
        bool                    *wrapped)
@@ -988,7 +908,7 @@ xlog_seek_logrec_hdr(
         */
        end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
        for (i = (int) tail_blk; i <= end_blk; i++) {
-               error = xlog_bread(log, i, 1, bp, &offset);
+               error = xlog_bread(log, i, 1, buffer, &offset);
                if (error)
                        goto out_error;
 
@@ -1006,7 +926,7 @@ xlog_seek_logrec_hdr(
         */
        if (tail_blk > head_blk && found != count) {
                for (i = 0; i < (int) head_blk; i++) {
-                       error = xlog_bread(log, i, 1, bp, &offset);
+                       error = xlog_bread(log, i, 1, buffer, &offset);
                        if (error)
                                goto out_error;
 
@@ -1069,22 +989,22 @@ xlog_verify_tail(
        int                     hsize)
 {
        struct xlog_rec_header  *thead;
-       struct xfs_buf          *bp;
+       char                    *buffer;
        xfs_daddr_t             first_bad;
        int                     error = 0;
        bool                    wrapped;
        xfs_daddr_t             tmp_tail;
        xfs_daddr_t             orig_tail = *tail_blk;
 
-       bp = xlog_get_bp(log, 1);
-       if (!bp)
+       buffer = xlog_alloc_buffer(log, 1);
+       if (!buffer)
                return -ENOMEM;
 
        /*
         * Make sure the tail points to a record (returns positive count on
         * success).
         */
-       error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp,
+       error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer,
                        &tmp_tail, &thead, &wrapped);
        if (error < 0)
                goto out;
@@ -1113,8 +1033,8 @@ xlog_verify_tail(
                        break;
 
                /* skip to the next record; returns positive count on success */
-               error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp,
-                               &tmp_tail, &thead, &wrapped);
+               error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2,
+                               buffer, &tmp_tail, &thead, &wrapped);
                if (error < 0)
                        goto out;
 
@@ -1129,7 +1049,7 @@ xlog_verify_tail(
                "Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
                         orig_tail, *tail_blk);
 out:
-       xlog_put_bp(bp);
+       kmem_free(buffer);
        return error;
 }
 
@@ -1151,13 +1071,13 @@ xlog_verify_head(
        struct xlog             *log,
        xfs_daddr_t             *head_blk,      /* in/out: unverified head */
        xfs_daddr_t             *tail_blk,      /* out: tail block */
-       struct xfs_buf          *bp,
+       char                    *buffer,
        xfs_daddr_t             *rhead_blk,     /* start blk of last record */
        struct xlog_rec_header  **rhead,        /* ptr to last record */
        bool                    *wrapped)       /* last rec. wraps phys. log */
 {
        struct xlog_rec_header  *tmp_rhead;
-       struct xfs_buf          *tmp_bp;
+       char                    *tmp_buffer;
        xfs_daddr_t             first_bad;
        xfs_daddr_t             tmp_rhead_blk;
        int                     found;
@@ -1168,15 +1088,15 @@ xlog_verify_head(
         * Check the head of the log for torn writes. Search backwards from the
         * head until we hit the tail or the maximum number of log record I/Os
         * that could have been in flight at one time. Use a temporary buffer so
-        * we don't trash the rhead/bp pointers from the caller.
+        * we don't trash the rhead/buffer pointers from the caller.
         */
-       tmp_bp = xlog_get_bp(log, 1);
-       if (!tmp_bp)
+       tmp_buffer = xlog_alloc_buffer(log, 1);
+       if (!tmp_buffer)
                return -ENOMEM;
        error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
-                                     XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
-                                     &tmp_rhead, &tmp_wrapped);
-       xlog_put_bp(tmp_bp);
+                                     XLOG_MAX_ICLOGS, tmp_buffer,
+                                     &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped);
+       kmem_free(tmp_buffer);
        if (error < 0)
                return error;
 
@@ -1205,8 +1125,8 @@ xlog_verify_head(
                 * (i.e., the records with invalid CRC) if the cycle number
                 * matches the the current cycle.
                 */
-               found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
-                                             rhead_blk, rhead, wrapped);
+               found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1,
+                               buffer, rhead_blk, rhead, wrapped);
                if (found < 0)
                        return found;
                if (found == 0)         /* XXX: right thing to do here? */
@@ -1266,7 +1186,7 @@ xlog_check_unmount_rec(
        xfs_daddr_t             *tail_blk,
        struct xlog_rec_header  *rhead,
        xfs_daddr_t             rhead_blk,
-       struct xfs_buf          *bp,
+       char                    *buffer,
        bool                    *clean)
 {
        struct xlog_op_header   *op_head;
@@ -1309,7 +1229,7 @@ xlog_check_unmount_rec(
        if (*head_blk == after_umount_blk &&
            be32_to_cpu(rhead->h_num_logops) == 1) {
                umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks);
-               error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+               error = xlog_bread(log, umount_data_blk, 1, buffer, &offset);
                if (error)
                        return error;
 
@@ -1388,7 +1308,7 @@ xlog_find_tail(
 {
        xlog_rec_header_t       *rhead;
        char                    *offset = NULL;
-       xfs_buf_t               *bp;
+       char                    *buffer;
        int                     error;
        xfs_daddr_t             rhead_blk;
        xfs_lsn_t               tail_lsn;
@@ -1402,11 +1322,11 @@ xlog_find_tail(
                return error;
        ASSERT(*head_blk < INT_MAX);
 
-       bp = xlog_get_bp(log, 1);
-       if (!bp)
+       buffer = xlog_alloc_buffer(log, 1);
+       if (!buffer)
                return -ENOMEM;
        if (*head_blk == 0) {                           /* special case */
-               error = xlog_bread(log, 0, 1, bp, &offset);
+               error = xlog_bread(log, 0, 1, buffer, &offset);
                if (error)
                        goto done;
 
@@ -1422,7 +1342,7 @@ xlog_find_tail(
         * block. This wraps all the way back around to the head so something is
         * seriously wrong if we can't find it.
         */
-       error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+       error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer,
                                      &rhead_blk, &rhead, &wrapped);
        if (error < 0)
                return error;
@@ -1443,7 +1363,7 @@ xlog_find_tail(
         * state to determine whether recovery is necessary.
         */
        error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
-                                      rhead_blk, bp, &clean);
+                                      rhead_blk, buffer, &clean);
        if (error)
                goto done;
 
@@ -1460,7 +1380,7 @@ xlog_find_tail(
        if (!clean) {
                xfs_daddr_t     orig_head = *head_blk;
 
-               error = xlog_verify_head(log, head_blk, tail_blk, bp,
+               error = xlog_verify_head(log, head_blk, tail_blk, buffer,
                                         &rhead_blk, &rhead, &wrapped);
                if (error)
                        goto done;
@@ -1471,7 +1391,7 @@ xlog_find_tail(
                                       wrapped);
                        tail_lsn = atomic64_read(&log->l_tail_lsn);
                        error = xlog_check_unmount_rec(log, head_blk, tail_blk,
-                                                      rhead, rhead_blk, bp,
+                                                      rhead, rhead_blk, buffer,
                                                       &clean);
                        if (error)
                                goto done;
@@ -1505,11 +1425,11 @@ xlog_find_tail(
         * But... if the -device- itself is readonly, just skip this.
         * We can't recover this device anyway, so it won't matter.
         */
-       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
+       if (!xfs_readonly_buftarg(log->l_targ))
                error = xlog_clear_stale_blocks(log, tail_lsn);
 
 done:
-       xlog_put_bp(bp);
+       kmem_free(buffer);
 
        if (error)
                xfs_warn(log->l_mp, "failed to locate log tail");
@@ -1537,7 +1457,7 @@ xlog_find_zeroed(
        struct xlog     *log,
        xfs_daddr_t     *blk_no)
 {
-       xfs_buf_t       *bp;
+       char            *buffer;
        char            *offset;
        uint            first_cycle, last_cycle;
        xfs_daddr_t     new_blk, last_blk, start_blk;
@@ -1547,35 +1467,36 @@ xlog_find_zeroed(
        *blk_no = 0;
 
        /* check totally zeroed log */
-       bp = xlog_get_bp(log, 1);
-       if (!bp)
+       buffer = xlog_alloc_buffer(log, 1);
+       if (!buffer)
                return -ENOMEM;
-       error = xlog_bread(log, 0, 1, bp, &offset);
+       error = xlog_bread(log, 0, 1, buffer, &offset);
        if (error)
-               goto bp_err;
+               goto out_free_buffer;
 
        first_cycle = xlog_get_cycle(offset);
        if (first_cycle == 0) {         /* completely zeroed log */
                *blk_no = 0;
-               xlog_put_bp(bp);
+               kmem_free(buffer);
                return 1;
        }
 
        /* check partially zeroed log */
-       error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
+       error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset);
        if (error)
-               goto bp_err;
+               goto out_free_buffer;
 
        last_cycle = xlog_get_cycle(offset);
        if (last_cycle != 0) {          /* log completely written to */
-               xlog_put_bp(bp);
+               kmem_free(buffer);
                return 0;
        }
 
        /* we have a partially zeroed log */
        last_blk = log_bbnum-1;
-       if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
-               goto bp_err;
+       error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0);
+       if (error)
+               goto out_free_buffer;
 
        /*
         * Validate the answer.  Because there is no way to guarantee that
@@ -1598,7 +1519,7 @@ xlog_find_zeroed(
         */
        if ((error = xlog_find_verify_cycle(log, start_blk,
                                         (int)num_scan_bblks, 0, &new_blk)))
-               goto bp_err;
+               goto out_free_buffer;
        if (new_blk != -1)
                last_blk = new_blk;
 
@@ -1610,11 +1531,11 @@ xlog_find_zeroed(
        if (error == 1)
                error = -EIO;
        if (error)
-               goto bp_err;
+               goto out_free_buffer;
 
        *blk_no = last_blk;
-bp_err:
-       xlog_put_bp(bp);
+out_free_buffer:
+       kmem_free(buffer);
        if (error)
                return error;
        return 1;
@@ -1657,7 +1578,7 @@ xlog_write_log_records(
        int             tail_block)
 {
        char            *offset;
-       xfs_buf_t       *bp;
+       char            *buffer;
        int             balign, ealign;
        int             sectbb = log->l_sectBBsize;
        int             end_block = start_block + blocks;
@@ -1674,7 +1595,7 @@ xlog_write_log_records(
        bufblks = 1 << ffs(blocks);
        while (bufblks > log->l_logBBsize)
                bufblks >>= 1;
-       while (!(bp = xlog_get_bp(log, bufblks))) {
+       while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
                bufblks >>= 1;
                if (bufblks < sectbb)
                        return -ENOMEM;
@@ -1686,9 +1607,9 @@ xlog_write_log_records(
         */
        balign = round_down(start_block, sectbb);
        if (balign != start_block) {
-               error = xlog_bread_noalign(log, start_block, 1, bp);
+               error = xlog_bread_noalign(log, start_block, 1, buffer);
                if (error)
-                       goto out_put_bp;
+                       goto out_free_buffer;
 
                j = start_block - balign;
        }
@@ -1705,29 +1626,28 @@ xlog_write_log_records(
                 */
                ealign = round_down(end_block, sectbb);
                if (j == 0 && (start_block + endcount > ealign)) {
-                       offset = bp->b_addr + BBTOB(ealign - start_block);
-                       error = xlog_bread_offset(log, ealign, sectbb,
-                                                       bp, offset);
+                       error = xlog_bread_noalign(log, ealign, sectbb,
+                                       buffer + BBTOB(ealign - start_block));
                        if (error)
                                break;
 
                }
 
-               offset = xlog_align(log, start_block, endcount, bp);
+               offset = buffer + xlog_align(log, start_block);
                for (; j < endcount; j++) {
                        xlog_add_record(log, offset, cycle, i+j,
                                        tail_cycle, tail_block);
                        offset += BBSIZE;
                }
-               error = xlog_bwrite(log, start_block, endcount, bp);
+               error = xlog_bwrite(log, start_block, endcount, buffer);
                if (error)
                        break;
                start_block += endcount;
                j = 0;
        }
 
- out_put_bp:
-       xlog_put_bp(bp);
+out_free_buffer:
+       kmem_free(buffer);
        return error;
 }
 
@@ -2162,7 +2082,7 @@ xlog_recover_do_inode_buffer(
        if (xfs_sb_version_hascrc(&mp->m_sb))
                bp->b_ops = &xfs_inode_buf_ops;
 
-       inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
+       inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
        for (i = 0; i < inodes_per_buf; i++) {
                next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
                        offsetof(xfs_dinode_t, di_next_unlinked);
@@ -2204,8 +2124,7 @@ xlog_recover_do_inode_buffer(
 
                ASSERT(item->ri_buf[item_index].i_addr != NULL);
                ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
-               ASSERT((reg_buf_offset + reg_buf_bytes) <=
-                                                       BBTOB(bp->b_io_length));
+               ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
 
                /*
                 * The current logged region contains a copy of the
@@ -2670,7 +2589,7 @@ xlog_recover_do_reg_buffer(
                ASSERT(nbits > 0);
                ASSERT(item->ri_buf[i].i_addr != NULL);
                ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
-               ASSERT(BBTOB(bp->b_io_length) >=
+               ASSERT(BBTOB(bp->b_length) >=
                       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
 
                /*
@@ -2882,23 +2801,22 @@ xlog_recover_buffer_pass2(
         *
         * Also make sure that only inode buffers with good sizes stay in
         * the buffer cache.  The kernel moves inodes in buffers of 1 block
-        * or mp->m_inode_cluster_size bytes, whichever is bigger.  The inode
+        * or inode_cluster_size bytes, whichever is bigger.  The inode
         * buffers in the log can be a different size if the log was generated
         * by an older kernel using unclustered inode buffers or a newer kernel
         * running with a different inode cluster size.  Regardless, if the
-        * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size)
-        * for *our* value of mp->m_inode_cluster_size, then we need to keep
+        * the inode buffer size isn't max(blocksize, inode_cluster_size)
+        * for *our* value of inode_cluster_size, then we need to keep
         * the buffer out of the buffer cache so that the buffer won't
         * overlap with future reads of those inodes.
         */
        if (XFS_DINODE_MAGIC ==
            be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
-           (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize,
-                       (uint32_t)log->l_mp->m_inode_cluster_size))) {
+           (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
                xfs_buf_stale(bp);
                error = xfs_bwrite(bp);
        } else {
-               ASSERT(bp->b_target->bt_mount == mp);
+               ASSERT(bp->b_mount == mp);
                bp->b_iodone = xlog_recover_iodone;
                xfs_buf_delwri_queue(bp, buffer_list);
        }
@@ -3260,7 +3178,7 @@ out_owner_change:
        /* re-generate the checksum. */
        xfs_dinode_calc_crc(log->l_mp, dip);
 
-       ASSERT(bp->b_target->bt_mount == mp);
+       ASSERT(bp->b_mount == mp);
        bp->b_iodone = xlog_recover_iodone;
        xfs_buf_delwri_queue(bp, buffer_list);
 
@@ -3399,7 +3317,7 @@ xlog_recover_dquot_pass2(
        }
 
        ASSERT(dq_f->qlf_size == 2);
-       ASSERT(bp->b_target->bt_mount == mp);
+       ASSERT(bp->b_mount == mp);
        bp->b_iodone = xlog_recover_iodone;
        xfs_buf_delwri_queue(bp, buffer_list);
 
@@ -3463,7 +3381,7 @@ xlog_recover_efd_pass2(
 {
        xfs_efd_log_format_t    *efd_formatp;
        xfs_efi_log_item_t      *efip = NULL;
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
        uint64_t                efi_id;
        struct xfs_ail_cursor   cur;
        struct xfs_ail          *ailp = log->l_ailp;
@@ -3849,6 +3767,7 @@ xlog_recover_do_icreate_pass2(
 {
        struct xfs_mount        *mp = log->l_mp;
        struct xfs_icreate_log  *icl;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
        xfs_agnumber_t          agno;
        xfs_agblock_t           agbno;
        unsigned int            count;
@@ -3898,10 +3817,10 @@ xlog_recover_do_icreate_pass2(
 
        /*
         * The inode chunk is either full or sparse and we only support
-        * m_ialloc_min_blks sized sparse allocations at this time.
+        * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
         */
-       if (length != mp->m_ialloc_blks &&
-           length != mp->m_ialloc_min_blks) {
+       if (length != igeo->ialloc_blks &&
+           length != igeo->ialloc_min_blks) {
                xfs_warn(log->l_mp,
                         "%s: unsupported chunk length", __FUNCTION__);
                return -EINVAL;
@@ -3921,13 +3840,13 @@ xlog_recover_do_icreate_pass2(
         * buffers for cancellation so we don't overwrite anything written after
         * a cancellation.
         */
-       bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
-       nbufs = length / mp->m_blocks_per_cluster;
+       bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
+       nbufs = length / igeo->blocks_per_cluster;
        for (i = 0, cancel_count = 0; i < nbufs; i++) {
                xfs_daddr_t     daddr;
 
                daddr = XFS_AGB_TO_DADDR(mp, agno,
-                                        agbno + i * mp->m_blocks_per_cluster);
+                               agbno + i * igeo->blocks_per_cluster);
                if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
                        cancel_count++;
        }
@@ -4956,12 +4875,11 @@ out:
  * A cancel occurs when the mount has failed and we're bailing out.
  * Release all pending log intent items so they don't pin the AIL.
  */
-STATIC int
+STATIC void
 xlog_recover_cancel_intents(
        struct xlog             *log)
 {
        struct xfs_log_item     *lip;
-       int                     error = 0;
        struct xfs_ail_cursor   cur;
        struct xfs_ail          *ailp;
 
@@ -5001,7 +4919,6 @@ xlog_recover_cancel_intents(
 
        xfs_trans_ail_cursor_done(&cur);
        spin_unlock(&ailp->ail_lock);
-       return error;
 }
 
 /*
@@ -5307,7 +5224,7 @@ xlog_do_recovery_pass(
        xfs_daddr_t             blk_no, rblk_no;
        xfs_daddr_t             rhead_blk;
        char                    *offset;
-       xfs_buf_t               *hbp, *dbp;
+       char                    *hbp, *dbp;
        int                     error = 0, h_size, h_len;
        int                     error2 = 0;
        int                     bblks, split_bblks;
@@ -5332,7 +5249,7 @@ xlog_do_recovery_pass(
                 * iclog header and extract the header size from it.  Get a
                 * new hbp that is the correct size.
                 */
-               hbp = xlog_get_bp(log, 1);
+               hbp = xlog_alloc_buffer(log, 1);
                if (!hbp)
                        return -ENOMEM;
 
@@ -5374,23 +5291,23 @@ xlog_do_recovery_pass(
                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
                        if (h_size % XLOG_HEADER_CYCLE_SIZE)
                                hblks++;
-                       xlog_put_bp(hbp);
-                       hbp = xlog_get_bp(log, hblks);
+                       kmem_free(hbp);
+                       hbp = xlog_alloc_buffer(log, hblks);
                } else {
                        hblks = 1;
                }
        } else {
                ASSERT(log->l_sectBBsize == 1);
                hblks = 1;
-               hbp = xlog_get_bp(log, 1);
+               hbp = xlog_alloc_buffer(log, 1);
                h_size = XLOG_BIG_RECORD_BSIZE;
        }
 
        if (!hbp)
                return -ENOMEM;
-       dbp = xlog_get_bp(log, BTOBB(h_size));
+       dbp = xlog_alloc_buffer(log, BTOBB(h_size));
        if (!dbp) {
-               xlog_put_bp(hbp);
+               kmem_free(hbp);
                return -ENOMEM;
        }
 
@@ -5405,7 +5322,7 @@ xlog_do_recovery_pass(
                        /*
                         * Check for header wrapping around physical end-of-log
                         */
-                       offset = hbp->b_addr;
+                       offset = hbp;
                        split_hblks = 0;
                        wrapped_hblks = 0;
                        if (blk_no + hblks <= log->l_logBBsize) {
@@ -5441,8 +5358,8 @@ xlog_do_recovery_pass(
                                 *   - order is important.
                                 */
                                wrapped_hblks = hblks - split_hblks;
-                               error = xlog_bread_offset(log, 0,
-                                               wrapped_hblks, hbp,
+                               error = xlog_bread_noalign(log, 0,
+                                               wrapped_hblks,
                                                offset + BBTOB(split_hblks));
                                if (error)
                                        goto bread_err2;
@@ -5473,7 +5390,7 @@ xlog_do_recovery_pass(
                        } else {
                                /* This log record is split across the
                                 * physical end of log */
-                               offset = dbp->b_addr;
+                               offset = dbp;
                                split_bblks = 0;
                                if (blk_no != log->l_logBBsize) {
                                        /* some data is before the physical
@@ -5502,8 +5419,8 @@ xlog_do_recovery_pass(
                                 *   _first_, then the log start (LR header end)
                                 *   - order is important.
                                 */
-                               error = xlog_bread_offset(log, 0,
-                                               bblks - split_bblks, dbp,
+                               error = xlog_bread_noalign(log, 0,
+                                               bblks - split_bblks,
                                                offset + BBTOB(split_bblks));
                                if (error)
                                        goto bread_err2;
@@ -5551,9 +5468,9 @@ xlog_do_recovery_pass(
        }
 
  bread_err2:
-       xlog_put_bp(dbp);
+       kmem_free(dbp);
  bread_err1:
-       xlog_put_bp(hbp);
+       kmem_free(hbp);
 
        /*
         * Submit buffers that have been added from the last record processed,
@@ -5687,7 +5604,7 @@ xlog_do_recover(
         * Now that we've finished replaying all buffer and inode
         * updates, re-read in the superblock and reverify it.
         */
-       bp = xfs_getsb(mp, 0);
+       bp = xfs_getsb(mp);
        bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
        ASSERT(!(bp->b_flags & XBF_WRITE));
        bp->b_flags |= XBF_READ;
@@ -5860,16 +5777,12 @@ xlog_recover_finish(
        return 0;
 }
 
-int
+void
 xlog_recover_cancel(
        struct xlog     *log)
 {
-       int             error = 0;
-
        if (log->l_flags & XLOG_RECOVERY_NEEDED)
-               error = xlog_recover_cancel_intents(log);
-
-       return error;
+               xlog_recover_cancel_intents(log);
 }
 
 #if defined(DEBUG)
index 6b736ea58d35402eb7e7975067a4303131cf3d83..9804efe525a9314e4b78a52e7c41c1f88296ffd2 100644 (file)
@@ -6,8 +6,8 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_error.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
-#include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 
index 6b2bfe81dc51be79b5b3d736dc3b8f0f459f09ec..322da69092909078fb16222877ddcf7ad9c1a62d 100644 (file)
@@ -12,9 +12,6 @@
 #include "xfs_bit.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_dir2.h"
 #include "xfs_ialloc.h"
@@ -27,7 +24,6 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_fsops.h"
-#include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_sysfs.h"
 #include "xfs_rmap_btree.h"
@@ -429,30 +425,6 @@ xfs_update_alignment(xfs_mount_t *mp)
        return 0;
 }
 
-/*
- * Set the maximum inode count for this filesystem
- */
-STATIC void
-xfs_set_maxicount(xfs_mount_t *mp)
-{
-       xfs_sb_t        *sbp = &(mp->m_sb);
-       uint64_t        icount;
-
-       if (sbp->sb_imax_pct) {
-               /*
-                * Make sure the maximum inode count is a multiple
-                * of the units we allocate inodes in.
-                */
-               icount = sbp->sb_dblocks * sbp->sb_imax_pct;
-               do_div(icount, 100);
-               do_div(icount, mp->m_ialloc_blks);
-               mp->m_maxicount = (icount * mp->m_ialloc_blks)  <<
-                                  sbp->sb_inopblog;
-       } else {
-               mp->m_maxicount = 0;
-       }
-}
-
 /*
  * Set the default minimum read and write sizes unless
  * already specified in a mount option.
@@ -509,29 +481,6 @@ xfs_set_low_space_thresholds(
        }
 }
 
-
-/*
- * Set whether we're using inode alignment.
- */
-STATIC void
-xfs_set_inoalignment(xfs_mount_t *mp)
-{
-       if (xfs_sb_version_hasalign(&mp->m_sb) &&
-               mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
-               mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
-       else
-               mp->m_inoalign_mask = 0;
-       /*
-        * If we are using stripe alignment, check whether
-        * the stripe unit is a multiple of the inode alignment
-        */
-       if (mp->m_dalign && mp->m_inoalign_mask &&
-           !(mp->m_dalign & mp->m_inoalign_mask))
-               mp->m_sinoalign = mp->m_dalign;
-       else
-               mp->m_sinoalign = 0;
-}
-
 /*
  * Check that the data (and log if separate) is an ok size.
  */
@@ -683,6 +632,7 @@ xfs_mountfs(
 {
        struct xfs_sb           *sbp = &(mp->m_sb);
        struct xfs_inode        *rip;
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
        uint64_t                resblks;
        uint                    quotamount = 0;
        uint                    quotaflags = 0;
@@ -749,12 +699,10 @@ xfs_mountfs(
        xfs_alloc_compute_maxlevels(mp);
        xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
        xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
-       xfs_ialloc_compute_maxlevels(mp);
+       xfs_ialloc_setup_geometry(mp);
        xfs_rmapbt_compute_maxlevels(mp);
        xfs_refcountbt_compute_maxlevels(mp);
 
-       xfs_set_maxicount(mp);
-
        /* enable fail_at_unmount as default */
        mp->m_fail_unmount = true;
 
@@ -787,29 +735,6 @@ xfs_mountfs(
        /* set the low space thresholds for dynamic preallocation */
        xfs_set_low_space_thresholds(mp);
 
-       /*
-        * Set the inode cluster size.
-        * This may still be overridden by the file system
-        * block size if it is larger than the chosen cluster size.
-        *
-        * For v5 filesystems, scale the cluster size with the inode size to
-        * keep a constant ratio of inode per cluster buffer, but only if mkfs
-        * has set the inode alignment value appropriately for larger cluster
-        * sizes.
-        */
-       mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               int     new_size = mp->m_inode_cluster_size;
-
-               new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
-               if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
-                       mp->m_inode_cluster_size = new_size;
-       }
-       mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp);
-       mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster);
-       mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp);
-       mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align);
-
        /*
         * If enabled, sparse inode chunk alignment is expected to match the
         * cluster size. Full inode chunk alignment must match the chunk size,
@@ -817,20 +742,15 @@ xfs_mountfs(
         */
        if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
            mp->m_sb.sb_spino_align !=
-                       XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
+                       XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
                xfs_warn(mp,
        "Sparse inode block alignment (%u) must match cluster size (%llu).",
                         mp->m_sb.sb_spino_align,
-                        XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
+                        XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
                error = -EINVAL;
                goto out_remove_uuid;
        }
 
-       /*
-        * Set inode alignment fields
-        */
-       xfs_set_inoalignment(mp);
-
        /*
         * Check that the data (and log if separate) is an ok size.
         */
@@ -1385,24 +1305,14 @@ xfs_mod_frextents(
  * xfs_getsb() is called to obtain the buffer for the superblock.
  * The buffer is returned locked and read in from disk.
  * The buffer should be released with a call to xfs_brelse().
- *
- * If the flags parameter is BUF_TRYLOCK, then we'll only return
- * the superblock buffer if it can be locked without sleeping.
- * If it can't then we'll return NULL.
  */
 struct xfs_buf *
 xfs_getsb(
-       struct xfs_mount        *mp,
-       int                     flags)
+       struct xfs_mount        *mp)
 {
        struct xfs_buf          *bp = mp->m_sb_bp;
 
-       if (!xfs_buf_trylock(bp)) {
-               if (flags & XBF_TRYLOCK)
-                       return NULL;
-               xfs_buf_lock(bp);
-       }
-
+       xfs_buf_lock(bp);
        xfs_buf_hold(bp);
        ASSERT(bp->b_flags & XBF_DONE);
        return bp;
index c81a5cd7c2288014da00c994d58f9d9fa2caede3..4adb6837439ac38fa600f0bd1f373634cef5bb78 100644 (file)
@@ -105,6 +105,7 @@ typedef struct xfs_mount {
        struct xfs_da_geometry  *m_dir_geo;     /* directory block geometry */
        struct xfs_da_geometry  *m_attr_geo;    /* attribute block geometry */
        struct xlog             *m_log;         /* log specific stuff */
+       struct xfs_ino_geometry m_ino_geo;      /* inode geometry */
        int                     m_logbufs;      /* number of log buffers */
        int                     m_logbsize;     /* size of each log buffer */
        uint                    m_rsumlevels;   /* rt summary levels */
@@ -126,12 +127,6 @@ typedef struct xfs_mount {
        uint8_t                 m_blkbit_log;   /* blocklog + NBBY */
        uint8_t                 m_blkbb_log;    /* blocklog - BBSHIFT */
        uint8_t                 m_agno_log;     /* log #ag's */
-       uint8_t                 m_agino_log;    /* #bits for agino in inum */
-       uint                    m_inode_cluster_size;/* min inode buf size */
-       unsigned int            m_inodes_per_cluster;
-       unsigned int            m_blocks_per_cluster;
-       unsigned int            m_cluster_align;
-       unsigned int            m_cluster_align_inodes;
        uint                    m_blockmask;    /* sb_blocksize-1 */
        uint                    m_blockwsize;   /* sb_blocksize in words */
        uint                    m_blockwmask;   /* blockwsize-1 */
@@ -139,15 +134,12 @@ typedef struct xfs_mount {
        uint                    m_alloc_mnr[2]; /* min alloc btree records */
        uint                    m_bmap_dmxr[2]; /* max bmap btree records */
        uint                    m_bmap_dmnr[2]; /* min bmap btree records */
-       uint                    m_inobt_mxr[2]; /* max inobt btree records */
-       uint                    m_inobt_mnr[2]; /* min inobt btree records */
        uint                    m_rmap_mxr[2];  /* max rmap btree records */
        uint                    m_rmap_mnr[2];  /* min rmap btree records */
        uint                    m_refc_mxr[2];  /* max refc btree records */
        uint                    m_refc_mnr[2];  /* min refc btree records */
        uint                    m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
        uint                    m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
-       uint                    m_in_maxlevels; /* max inobt btree levels. */
        uint                    m_rmap_maxlevels; /* max rmap btree levels */
        uint                    m_refc_maxlevels; /* max refcount btree level */
        xfs_extlen_t            m_ag_prealloc_blocks; /* reserved ag blocks */
@@ -159,20 +151,13 @@ typedef struct xfs_mount {
        int                     m_fixedfsid[2]; /* unchanged for life of FS */
        uint64_t                m_flags;        /* global mount flags */
        bool                    m_finobt_nores; /* no per-AG finobt resv. */
-       int                     m_ialloc_inos;  /* inodes in inode allocation */
-       int                     m_ialloc_blks;  /* blocks in inode allocation */
-       int                     m_ialloc_min_blks;/* min blocks in sparse inode
-                                                  * allocation */
-       int                     m_inoalign_mask;/* mask sb_inoalignmt if used */
        uint                    m_qflags;       /* quota status flags */
        struct xfs_trans_resv   m_resv;         /* precomputed res values */
-       uint64_t                m_maxicount;    /* maximum inode count */
        uint64_t                m_resblks;      /* total reserved blocks */
        uint64_t                m_resblks_avail;/* available reserved blocks */
        uint64_t                m_resblks_save; /* reserved blks @ remount,ro */
        int                     m_dalign;       /* stripe unit */
        int                     m_swidth;       /* stripe width */
-       int                     m_sinoalign;    /* stripe unit inode alignment */
        uint8_t                 m_sectbb_log;   /* sectlog - BBSHIFT */
        const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
        const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
@@ -198,7 +183,6 @@ typedef struct xfs_mount {
        struct workqueue_struct *m_unwritten_workqueue;
        struct workqueue_struct *m_cil_workqueue;
        struct workqueue_struct *m_reclaim_workqueue;
-       struct workqueue_struct *m_log_workqueue;
        struct workqueue_struct *m_eofblocks_workqueue;
        struct workqueue_struct *m_sync_workqueue;
 
@@ -226,6 +210,8 @@ typedef struct xfs_mount {
 #endif
 } xfs_mount_t;
 
+#define M_IGEO(mp)             (&(mp)->m_ino_geo)
+
 /*
  * Flags for m_flags.
  */
@@ -465,7 +451,7 @@ extern int  xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
                                 bool reserved);
 extern int     xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
 
-extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
+extern struct xfs_buf *xfs_getsb(xfs_mount_t *);
 extern int     xfs_readsb(xfs_mount_t *, int);
 extern void    xfs_freesb(xfs_mount_t *);
 extern bool    xfs_fs_writable(struct xfs_mount *mp, int level);
index c8ba98fae30aefa7013ebfa168fed652a955f3e7..b6701b4f59a9b5bc44cd66e69a4eb7d5b69a547b 100644 (file)
@@ -146,6 +146,11 @@ xfs_check_ondisk_structs(void)
        XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic,   0);
        XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic,   0);
        XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0);
+
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat,              192);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers,              24);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req,          64);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req,          64);
 }
 
 #endif /* __XFS_ONDISK_H */
index bde2c9f56a46ab883fdfd5cb932d958838c867eb..0c954cad74493cf6fb9002b93101baeb7717e3f3 100644 (file)
@@ -2,23 +2,16 @@
 /*
  * Copyright (c) 2014 Christoph Hellwig.
  */
-#include <linux/iomap.h>
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_log.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_error.h"
 #include "xfs_iomap.h"
-#include "xfs_shared.h"
-#include "xfs_bit.h"
-#include "xfs_pnfs.h"
 
 /*
  * Ensure that we do not have any outstanding pNFS layouts that can be used by
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
new file mode 100644 (file)
index 0000000..4bcc3e6
--- /dev/null
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_trace.h"
+#include "xfs_sysctl.h"
+#include "xfs_pwork.h"
+#include <linux/nmi.h>
+
+/*
+ * Parallel Work Queue
+ * ===================
+ *
+ * Abstract away the details of running a large and "obviously" parallelizable
+ * task across multiple CPUs.  Callers initialize the pwork control object with
+ * a desired level of parallelization and a work function.  Next, they embed
+ * struct xfs_pwork in whatever structure they use to pass work context to a
+ * worker thread and queue that pwork.  The work function will be passed the
+ * pwork item when it is run (from process context) and any returned error will
+ * be recorded in xfs_pwork_ctl.error.  Work functions should check for errors
+ * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not
+ * stop workqueue item processing.
+ *
+ * This is the rough equivalent of the xfsprogs workqueue code, though we can't
+ * reuse that name here.
+ */
+
+/* Invoke our caller's function. */
+static void
+xfs_pwork_work(
+       struct work_struct      *work)
+{
+       struct xfs_pwork        *pwork;
+       struct xfs_pwork_ctl    *pctl;
+       int                     error;
+
+       pwork = container_of(work, struct xfs_pwork, work);
+       pctl = pwork->pctl;
+       error = pctl->work_fn(pctl->mp, pwork);
+       if (error && !pctl->error)
+               pctl->error = error;
+       if (atomic_dec_and_test(&pctl->nr_work))
+               wake_up(&pctl->poll_wait);
+}
+
+/*
+ * Set up control data for parallel work.  @work_fn is the function that will
+ * be called.  @tag will be written into the kernel threads.  @nr_threads is
+ * the level of parallelism desired, or 0 for no limit.
+ */
+int
+xfs_pwork_init(
+       struct xfs_mount        *mp,
+       struct xfs_pwork_ctl    *pctl,
+       xfs_pwork_work_fn       work_fn,
+       const char              *tag,
+       unsigned int            nr_threads)
+{
+#ifdef DEBUG
+       if (xfs_globals.pwork_threads >= 0)
+               nr_threads = xfs_globals.pwork_threads;
+#endif
+       trace_xfs_pwork_init(mp, nr_threads, current->pid);
+
+       pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag,
+                       current->pid);
+       if (!pctl->wq)
+               return -ENOMEM;
+       pctl->work_fn = work_fn;
+       pctl->error = 0;
+       pctl->mp = mp;
+       atomic_set(&pctl->nr_work, 0);
+       init_waitqueue_head(&pctl->poll_wait);
+
+       return 0;
+}
+
+/* Queue some parallel work. */
+void
+xfs_pwork_queue(
+       struct xfs_pwork_ctl    *pctl,
+       struct xfs_pwork        *pwork)
+{
+       INIT_WORK(&pwork->work, xfs_pwork_work);
+       pwork->pctl = pctl;
+       atomic_inc(&pctl->nr_work);
+       queue_work(pctl->wq, &pwork->work);
+}
+
+/* Wait for the work to finish and tear down the control structure. */
+int
+xfs_pwork_destroy(
+       struct xfs_pwork_ctl    *pctl)
+{
+       destroy_workqueue(pctl->wq);
+       pctl->wq = NULL;
+       return pctl->error;
+}
+
+/*
+ * Wait for the work to finish by polling completion status and touch the soft
+ * lockup watchdog.  This is for callers such as mount which hold locks.
+ */
+void
+xfs_pwork_poll(
+       struct xfs_pwork_ctl    *pctl)
+{
+       while (wait_event_timeout(pctl->poll_wait,
+                               atomic_read(&pctl->nr_work) == 0, HZ) == 0)
+               touch_softlockup_watchdog();
+}
+
+/*
+ * Return the amount of parallelism that the data device can handle, or 0 for
+ * no limit.
+ */
+unsigned int
+xfs_pwork_guess_datadev_parallelism(
+       struct xfs_mount        *mp)
+{
+       struct xfs_buftarg      *btp = mp->m_ddev_targp;
+
+       /*
+        * For now we'll go with the most conservative setting possible,
+        * which is two threads for an SSD and 1 thread everywhere else.
+        */
+       return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1;
+}
diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h
new file mode 100644 (file)
index 0000000..8133124
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_PWORK_H__
+#define __XFS_PWORK_H__
+
+struct xfs_pwork;
+struct xfs_mount;
+
+typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork);
+
+/*
+ * Parallel work coordination structure.
+ */
+struct xfs_pwork_ctl {
+       struct workqueue_struct *wq;
+       struct xfs_mount        *mp;
+       xfs_pwork_work_fn       work_fn;
+       struct wait_queue_head  poll_wait;
+       atomic_t                nr_work;
+       int                     error;
+};
+
+/*
+ * Embed this parallel work control item inside your own work structure,
+ * then queue work with it.
+ */
+struct xfs_pwork {
+       struct work_struct      work;
+       struct xfs_pwork_ctl    *pctl;
+};
+
+#define XFS_PWORK_SINGLE_THREADED      { .pctl = NULL }
+
+/* Have we been told to abort? */
+static inline bool
+xfs_pwork_ctl_want_abort(
+       struct xfs_pwork_ctl    *pctl)
+{
+       return pctl && pctl->error;
+}
+
+/* Have we been told to abort? */
+static inline bool
+xfs_pwork_want_abort(
+       struct xfs_pwork        *pwork)
+{
+       return xfs_pwork_ctl_want_abort(pwork->pctl);
+}
+
+int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
+               xfs_pwork_work_fn work_fn, const char *tag,
+               unsigned int nr_threads);
+void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
+int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
+void xfs_pwork_poll(struct xfs_pwork_ctl *pctl);
+unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
+
+#endif /* __XFS_PWORK_H__ */
index aa6b6db3db0ec953096bf9fb6ae166d9cc392238..5e7a37f0cf84856663e93e40a605c9c169f0ddb3 100644 (file)
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
+#include "xfs_iwalk.h"
 #include "xfs_quota.h"
-#include "xfs_error.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_bmap_util.h"
 #include "xfs_trans.h"
 #include "xfs_trans_space.h"
 #include "xfs_qm.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
-#include "xfs_cksum.h"
 
 /*
  * The global quota manager. There is only one of these for the entire
@@ -1118,17 +1114,15 @@ xfs_qm_quotacheck_dqadjust(
 /* ARGSUSED */
 STATIC int
 xfs_qm_dqusage_adjust(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* not used */
-       int             ubsize,         /* not used */
-       int             *ubused,        /* not used */
-       int             *res)           /* result code value */
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_ino_t               ino,
+       void                    *data)
 {
-       xfs_inode_t     *ip;
-       xfs_qcnt_t      nblks;
-       xfs_filblks_t   rtblks = 0;     /* total rt blks */
-       int             error;
+       struct xfs_inode        *ip;
+       xfs_qcnt_t              nblks;
+       xfs_filblks_t           rtblks = 0;     /* total rt blks */
+       int                     error;
 
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
@@ -1136,20 +1130,18 @@ xfs_qm_dqusage_adjust(
         * rootino must have its resources accounted for, not so with the quota
         * inodes.
         */
-       if (xfs_is_quota_inode(&mp->m_sb, ino)) {
-               *res = BULKSTAT_RV_NOTHING;
-               return -EINVAL;
-       }
+       if (xfs_is_quota_inode(&mp->m_sb, ino))
+               return 0;
 
        /*
         * We don't _need_ to take the ilock EXCL here because quotacheck runs
         * at mount time and therefore nobody will be racing chown/chproj.
         */
-       error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip);
-       if (error) {
-               *res = BULKSTAT_RV_NOTHING;
+       error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
+       if (error == -EINVAL || error == -ENOENT)
+               return 0;
+       if (error)
                return error;
-       }
 
        ASSERT(ip->i_delayed_blks == 0);
 
@@ -1157,7 +1149,7 @@ xfs_qm_dqusage_adjust(
                struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
 
                if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-                       error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+                       error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
                        if (error)
                                goto error0;
                }
@@ -1200,13 +1192,8 @@ xfs_qm_dqusage_adjust(
                        goto error0;
        }
 
-       xfs_irele(ip);
-       *res = BULKSTAT_RV_DIDONE;
-       return 0;
-
 error0:
        xfs_irele(ip);
-       *res = BULKSTAT_RV_GIVEUP;
        return error;
 }
 
@@ -1270,18 +1257,13 @@ STATIC int
 xfs_qm_quotacheck(
        xfs_mount_t     *mp)
 {
-       int                     done, count, error, error2;
-       xfs_ino_t               lastino;
-       size_t                  structsz;
+       int                     error, error2;
        uint                    flags;
        LIST_HEAD               (buffer_list);
        struct xfs_inode        *uip = mp->m_quotainfo->qi_uquotaip;
        struct xfs_inode        *gip = mp->m_quotainfo->qi_gquotaip;
        struct xfs_inode        *pip = mp->m_quotainfo->qi_pquotaip;
 
-       count = INT_MAX;
-       structsz = 1;
-       lastino = 0;
        flags = 0;
 
        ASSERT(uip || gip || pip);
@@ -1318,18 +1300,10 @@ xfs_qm_quotacheck(
                flags |= XFS_PQUOTA_CHKD;
        }
 
-       do {
-               /*
-                * Iterate thru all the inodes in the file system,
-                * adjusting the corresponding dquot counters in core.
-                */
-               error = xfs_bulkstat(mp, &lastino, &count,
-                                    xfs_qm_dqusage_adjust,
-                                    structsz, NULL, &done);
-               if (error)
-                       break;
-
-       } while (!done);
+       error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
+                       NULL);
+       if (error)
+               goto error_return;
 
        /*
         * We've made all the changes that we need to make incore.  Flush them
index 3091e4bc04efe1e6f4d9aa88ed7987a221f9bd78..5d72e88598b41a16316b09f0c50819b48ad4871b 100644 (file)
@@ -5,13 +5,13 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
-#include "xfs_error.h"
 #include "xfs_trans.h"
 #include "xfs_qm.h"
 
index b3190890f096d5f8717f8762e53be74edfdaa9d9..da7ad0383037bfb5994393c1f595bad55b1b6c0b 100644 (file)
@@ -4,7 +4,6 @@
  * All Rights Reserved.
  */
 
-#include <linux/capability.h>
 
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
-#include "xfs_bit.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_qm.h"
-#include "xfs_trace.h"
 #include "xfs_icache.h"
-#include "xfs_defer.h"
 
 STATIC int     xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
 STATIC int     xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
index a7c0c657dfaf943037bcb6fa753996c32956413b..cd6c7210a37366a9e144f85f5e042cea4aa20968 100644 (file)
@@ -4,6 +4,7 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_inode.h"
 #include "xfs_quota.h"
 #include "xfs_trans.h"
-#include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_qm.h"
-#include <linux/quota.h>
 
 
 static void
index fce38b56b962cf07ea5ce82b205db30dfe7dc2c8..d8288aa0670ad61a5353452f0aaabae8663f4a4f 100644 (file)
@@ -14,7 +14,6 @@
 #include "xfs_defer.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
 #include "xfs_refcount_item.h"
 #include "xfs_log.h"
 #include "xfs_refcount.h"
@@ -94,15 +93,6 @@ xfs_cui_item_format(
                        xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
 }
 
-/*
- * Pinning has no meaning for an cui item, so just return.
- */
-STATIC void
-xfs_cui_item_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
 /*
  * The unpin operation is the last place an CUI is manipulated in the log. It is
  * either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -121,72 +111,23 @@ xfs_cui_item_unpin(
        xfs_cui_release(cuip);
 }
 
-/*
- * CUI items have no locking or pushing.  However, since CUIs are pulled from
- * the AIL when their corresponding CUDs are committed to disk, their situation
- * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log.  This should help in getting the CUI out of
- * the AIL.
- */
-STATIC uint
-xfs_cui_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       return XFS_ITEM_PINNED;
-}
-
 /*
  * The CUI has been either committed or aborted if the transaction has been
  * cancelled. If the transaction was cancelled, an CUD isn't going to be
  * constructed and thus we free the CUI here directly.
  */
 STATIC void
-xfs_cui_item_unlock(
+xfs_cui_item_release(
        struct xfs_log_item     *lip)
 {
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               xfs_cui_release(CUI_ITEM(lip));
+       xfs_cui_release(CUI_ITEM(lip));
 }
 
-/*
- * The CUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_cui_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * The CUI dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_cui_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all cui log items.
- */
 static const struct xfs_item_ops xfs_cui_item_ops = {
        .iop_size       = xfs_cui_item_size,
        .iop_format     = xfs_cui_item_format,
-       .iop_pin        = xfs_cui_item_pin,
        .iop_unpin      = xfs_cui_item_unpin,
-       .iop_unlock     = xfs_cui_item_unlock,
-       .iop_committed  = xfs_cui_item_committed,
-       .iop_push       = xfs_cui_item_push,
-       .iop_committing = xfs_cui_item_committing,
+       .iop_release    = xfs_cui_item_release,
 };
 
 /*
@@ -254,126 +195,250 @@ xfs_cud_item_format(
 }
 
 /*
- * Pinning has no meaning for an cud item, so just return.
+ * The CUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the CUI and free the
+ * CUD.
  */
 STATIC void
-xfs_cud_item_pin(
+xfs_cud_item_release(
        struct xfs_log_item     *lip)
 {
+       struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+
+       xfs_cui_release(cudp->cud_cuip);
+       kmem_zone_free(xfs_cud_zone, cudp);
 }
 
-/*
- * Since pinning has no meaning for an cud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_cud_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
+static const struct xfs_item_ops xfs_cud_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+       .iop_size       = xfs_cud_item_size,
+       .iop_format     = xfs_cud_item_format,
+       .iop_release    = xfs_cud_item_release,
+};
+
+static struct xfs_cud_log_item *
+xfs_trans_get_cud(
+       struct xfs_trans                *tp,
+       struct xfs_cui_log_item         *cuip)
 {
+       struct xfs_cud_log_item         *cudp;
+
+       cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
+       xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD,
+                         &xfs_cud_item_ops);
+       cudp->cud_cuip = cuip;
+       cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
+
+       xfs_trans_add_item(tp, &cudp->cud_item);
+       return cudp;
 }
 
 /*
- * There isn't much you can do to push on an cud item.  It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Finish an refcount update and log it to the CUD. Note that the
+ * transaction is marked dirty regardless of whether the refcount
+ * update succeeds or fails to support the CUI/CUD lifecycle rules.
  */
-STATIC uint
-xfs_cud_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+static int
+xfs_trans_log_finish_refcount_update(
+       struct xfs_trans                *tp,
+       struct xfs_cud_log_item         *cudp,
+       enum xfs_refcount_intent_type   type,
+       xfs_fsblock_t                   startblock,
+       xfs_extlen_t                    blockcount,
+       xfs_fsblock_t                   *new_fsb,
+       xfs_extlen_t                    *new_len,
+       struct xfs_btree_cur            **pcur)
 {
-       return XFS_ITEM_PINNED;
+       int                             error;
+
+       error = xfs_refcount_finish_one(tp, type, startblock,
+                       blockcount, new_fsb, new_len, pcur);
+
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the CUI and frees the CUD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
+
+       return error;
 }
 
-/*
- * The CUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the CUI and free the
- * CUD.
- */
-STATIC void
-xfs_cud_item_unlock(
-       struct xfs_log_item     *lip)
+/* Sort refcount intents by AG. */
+static int
+xfs_refcount_update_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
 {
-       struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+       struct xfs_mount                *mp = priv;
+       struct xfs_refcount_intent      *ra;
+       struct xfs_refcount_intent      *rb;
+
+       ra = container_of(a, struct xfs_refcount_intent, ri_list);
+       rb = container_of(b, struct xfs_refcount_intent, ri_list);
+       return  XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
+               XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
+}
 
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-               xfs_cui_release(cudp->cud_cuip);
-               kmem_zone_free(xfs_cud_zone, cudp);
+/* Get an CUI. */
+STATIC void *
+xfs_refcount_update_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
+{
+       struct xfs_cui_log_item         *cuip;
+
+       ASSERT(tp != NULL);
+       ASSERT(count > 0);
+
+       cuip = xfs_cui_init(tp->t_mountp, count);
+       ASSERT(cuip != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &cuip->cui_item);
+       return cuip;
+}
+
+/* Set the phys extent flags for this reverse mapping. */
+static void
+xfs_trans_set_refcount_flags(
+       struct xfs_phys_extent          *refc,
+       enum xfs_refcount_intent_type   type)
+{
+       refc->pe_flags = 0;
+       switch (type) {
+       case XFS_REFCOUNT_INCREASE:
+       case XFS_REFCOUNT_DECREASE:
+       case XFS_REFCOUNT_ALLOC_COW:
+       case XFS_REFCOUNT_FREE_COW:
+               refc->pe_flags |= type;
+               break;
+       default:
+               ASSERT(0);
        }
 }
 
-/*
- * When the cud item is committed to disk, all we need to do is delete our
- * reference to our partner cui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_cud_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Log refcount updates in the intent item. */
+STATIC void
+xfs_refcount_update_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
 {
-       struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+       struct xfs_cui_log_item         *cuip = intent;
+       struct xfs_refcount_intent      *refc;
+       uint                            next_extent;
+       struct xfs_phys_extent          *ext;
+
+       refc = container_of(item, struct xfs_refcount_intent, ri_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
 
        /*
-        * Drop the CUI reference regardless of whether the CUD has been
-        * aborted. Once the CUD transaction is constructed, it is the sole
-        * responsibility of the CUD to release the CUI (even if the CUI is
-        * aborted due to log I/O error).
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
         */
-       xfs_cui_release(cudp->cud_cuip);
-       kmem_zone_free(xfs_cud_zone, cudp);
+       next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
+       ASSERT(next_extent < cuip->cui_format.cui_nextents);
+       ext = &cuip->cui_format.cui_extents[next_extent];
+       ext->pe_startblock = refc->ri_startblock;
+       ext->pe_len = refc->ri_blockcount;
+       xfs_trans_set_refcount_flags(ext, refc->ri_type);
+}
 
-       return (xfs_lsn_t)-1;
+/* Get an CUD so we can process all the deferred refcount updates. */
+STATIC void *
+xfs_refcount_update_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_cud(tp, intent);
 }
 
-/*
- * The CUD dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_cud_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Process a deferred refcount update. */
+STATIC int
+xfs_refcount_update_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
 {
+       struct xfs_refcount_intent      *refc;
+       xfs_fsblock_t                   new_fsb;
+       xfs_extlen_t                    new_aglen;
+       int                             error;
+
+       refc = container_of(item, struct xfs_refcount_intent, ri_list);
+       error = xfs_trans_log_finish_refcount_update(tp, done_item,
+                       refc->ri_type,
+                       refc->ri_startblock,
+                       refc->ri_blockcount,
+                       &new_fsb, &new_aglen,
+                       (struct xfs_btree_cur **)state);
+       /* Did we run out of reservation?  Requeue what we didn't finish. */
+       if (!error && new_aglen > 0) {
+               ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
+                      refc->ri_type == XFS_REFCOUNT_DECREASE);
+               refc->ri_startblock = new_fsb;
+               refc->ri_blockcount = new_aglen;
+               return -EAGAIN;
+       }
+       kmem_free(refc);
+       return error;
 }
 
-/*
- * This is the ops vector shared by all cud log items.
- */
-static const struct xfs_item_ops xfs_cud_item_ops = {
-       .iop_size       = xfs_cud_item_size,
-       .iop_format     = xfs_cud_item_format,
-       .iop_pin        = xfs_cud_item_pin,
-       .iop_unpin      = xfs_cud_item_unpin,
-       .iop_unlock     = xfs_cud_item_unlock,
-       .iop_committed  = xfs_cud_item_committed,
-       .iop_push       = xfs_cud_item_push,
-       .iop_committing = xfs_cud_item_committing,
-};
+/* Clean up after processing deferred refcounts. */
+STATIC void
+xfs_refcount_update_finish_cleanup(
+       struct xfs_trans        *tp,
+       void                    *state,
+       int                     error)
+{
+       struct xfs_btree_cur    *rcur = state;
 
-/*
- * Allocate and initialize an cud item with the given number of extents.
- */
-struct xfs_cud_log_item *
-xfs_cud_init(
-       struct xfs_mount                *mp,
-       struct xfs_cui_log_item         *cuip)
+       xfs_refcount_finish_one_cleanup(tp, rcur, error);
+}
 
+/* Abort all pending CUIs. */
+STATIC void
+xfs_refcount_update_abort_intent(
+       void                            *intent)
 {
-       struct xfs_cud_log_item *cudp;
+       xfs_cui_release(intent);
+}
 
-       cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
-       xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
-       cudp->cud_cuip = cuip;
-       cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
+/* Cancel a deferred refcount update. */
+STATIC void
+xfs_refcount_update_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_refcount_intent      *refc;
 
-       return cudp;
+       refc = container_of(item, struct xfs_refcount_intent, ri_list);
+       kmem_free(refc);
 }
 
+const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
+       .max_items      = XFS_CUI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_refcount_update_diff_items,
+       .create_intent  = xfs_refcount_update_create_intent,
+       .abort_intent   = xfs_refcount_update_abort_intent,
+       .log_item       = xfs_refcount_update_log_item,
+       .create_done    = xfs_refcount_update_create_done,
+       .finish_item    = xfs_refcount_update_finish_item,
+       .finish_cleanup = xfs_refcount_update_finish_cleanup,
+       .cancel_item    = xfs_refcount_update_cancel_item,
+};
+
 /*
  * Process a refcount update intent item that was recovered from the log.
  * We need to update the refcountbt.
index 3896dcc2368f8e19b206ef49abc6bab0f12852b5..e47530f30489deb04dcacba6aacf98c90e813b6e 100644 (file)
@@ -78,8 +78,6 @@ extern struct kmem_zone       *xfs_cui_zone;
 extern struct kmem_zone        *xfs_cud_zone;
 
 struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
-struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
-               struct xfs_cui_log_item *);
 void xfs_cui_item_free(struct xfs_cui_log_item *);
 void xfs_cui_release(struct xfs_cui_log_item *);
 int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
index 680ae7662a78ef260fd4897b244b69898a239c5a..c4ec7afd1170a7550df8704b2bb71ae450c927f5 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
-#include "xfs_error.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_ioctl.h"
 #include "xfs_trace.h"
-#include "xfs_log.h"
 #include "xfs_icache.h"
-#include "xfs_pnfs.h"
 #include "xfs_btree.h"
 #include "xfs_refcount_btree.h"
 #include "xfs_refcount.h"
 #include "xfs_trans_space.h"
 #include "xfs_bit.h"
 #include "xfs_alloc.h"
-#include "xfs_quota_defs.h"
 #include "xfs_quota.h"
 #include "xfs_reflink.h"
 #include "xfs_iomap.h"
-#include "xfs_rmap_btree.h"
 #include "xfs_sb.h"
 #include "xfs_ag_resv.h"
 
@@ -572,7 +561,7 @@ xfs_reflink_cancel_cow_range(
 
        /* Start a rolling transaction to remove the mappings */
        error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-                       0, 0, XFS_TRANS_NOFS, &tp);
+                       0, 0, 0, &tp);
        if (error)
                goto out;
 
@@ -631,7 +620,7 @@ xfs_reflink_end_cow_extent(
 
        resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
-                       XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+                       XFS_TRANS_RESERVE, &tp);
        if (error)
                return error;
 
index 127dc9c32a54247be2b24ec56de1f2d91e82a6c4..77ed557b6127c6c23c00ea285ad9ce3fa0721dcd 100644 (file)
@@ -14,7 +14,6 @@
 #include "xfs_defer.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
 #include "xfs_rmap_item.h"
 #include "xfs_log.h"
 #include "xfs_rmap.h"
@@ -93,15 +92,6 @@ xfs_rui_item_format(
                        xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents));
 }
 
-/*
- * Pinning has no meaning for an rui item, so just return.
- */
-STATIC void
-xfs_rui_item_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
 /*
  * The unpin operation is the last place an RUI is manipulated in the log. It is
  * either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -120,72 +110,23 @@ xfs_rui_item_unpin(
        xfs_rui_release(ruip);
 }
 
-/*
- * RUI items have no locking or pushing.  However, since RUIs are pulled from
- * the AIL when their corresponding RUDs are committed to disk, their situation
- * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log.  This should help in getting the RUI out of
- * the AIL.
- */
-STATIC uint
-xfs_rui_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
-{
-       return XFS_ITEM_PINNED;
-}
-
 /*
  * The RUI has been either committed or aborted if the transaction has been
  * cancelled. If the transaction was cancelled, an RUD isn't going to be
  * constructed and thus we free the RUI here directly.
  */
 STATIC void
-xfs_rui_item_unlock(
+xfs_rui_item_release(
        struct xfs_log_item     *lip)
 {
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
-               xfs_rui_release(RUI_ITEM(lip));
+       xfs_rui_release(RUI_ITEM(lip));
 }
 
-/*
- * The RUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_rui_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * The RUI dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_rui_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all rui log items.
- */
 static const struct xfs_item_ops xfs_rui_item_ops = {
        .iop_size       = xfs_rui_item_size,
        .iop_format     = xfs_rui_item_format,
-       .iop_pin        = xfs_rui_item_pin,
        .iop_unpin      = xfs_rui_item_unpin,
-       .iop_unlock     = xfs_rui_item_unlock,
-       .iop_committed  = xfs_rui_item_committed,
-       .iop_push       = xfs_rui_item_push,
-       .iop_committing = xfs_rui_item_committing,
+       .iop_release    = xfs_rui_item_release,
 };
 
 /*
@@ -275,126 +216,271 @@ xfs_rud_item_format(
 }
 
 /*
- * Pinning has no meaning for an rud item, so just return.
+ * The RUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the RUI and free the
+ * RUD.
  */
 STATIC void
-xfs_rud_item_pin(
+xfs_rud_item_release(
        struct xfs_log_item     *lip)
 {
+       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+
+       xfs_rui_release(rudp->rud_ruip);
+       kmem_zone_free(xfs_rud_zone, rudp);
 }
 
-/*
- * Since pinning has no meaning for an rud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_rud_item_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
+static const struct xfs_item_ops xfs_rud_item_ops = {
+       .flags          = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+       .iop_size       = xfs_rud_item_size,
+       .iop_format     = xfs_rud_item_format,
+       .iop_release    = xfs_rud_item_release,
+};
+
+static struct xfs_rud_log_item *
+xfs_trans_get_rud(
+       struct xfs_trans                *tp,
+       struct xfs_rui_log_item         *ruip)
 {
+       struct xfs_rud_log_item         *rudp;
+
+       rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
+       xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD,
+                         &xfs_rud_item_ops);
+       rudp->rud_ruip = ruip;
+       rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+
+       xfs_trans_add_item(tp, &rudp->rud_item);
+       return rudp;
 }
 
-/*
- * There isn't much you can do to push on an rud item.  It is simply stuck
- * waiting for the log to be flushed to disk.
- */
-STATIC uint
-xfs_rud_item_push(
-       struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+/* Set the map extent flags for this reverse mapping. */
+static void
+xfs_trans_set_rmap_flags(
+       struct xfs_map_extent           *rmap,
+       enum xfs_rmap_intent_type       type,
+       int                             whichfork,
+       xfs_exntst_t                    state)
 {
-       return XFS_ITEM_PINNED;
+       rmap->me_flags = 0;
+       if (state == XFS_EXT_UNWRITTEN)
+               rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
+       if (whichfork == XFS_ATTR_FORK)
+               rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
+       switch (type) {
+       case XFS_RMAP_MAP:
+               rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
+               break;
+       case XFS_RMAP_MAP_SHARED:
+               rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
+               break;
+       case XFS_RMAP_UNMAP:
+               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
+               break;
+       case XFS_RMAP_UNMAP_SHARED:
+               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
+               break;
+       case XFS_RMAP_CONVERT:
+               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
+               break;
+       case XFS_RMAP_CONVERT_SHARED:
+               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
+               break;
+       case XFS_RMAP_ALLOC:
+               rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
+               break;
+       case XFS_RMAP_FREE:
+               rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
+               break;
+       default:
+               ASSERT(0);
+       }
 }
 
 /*
- * The RUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the RUI and free the
- * RUD.
+ * Finish an rmap update and log it to the RUD. Note that the transaction is
+ * marked dirty regardless of whether the rmap update succeeds or fails to
+ * support the RUI/RUD lifecycle rules.
  */
-STATIC void
-xfs_rud_item_unlock(
-       struct xfs_log_item     *lip)
+static int
+xfs_trans_log_finish_rmap_update(
+       struct xfs_trans                *tp,
+       struct xfs_rud_log_item         *rudp,
+       enum xfs_rmap_intent_type       type,
+       uint64_t                        owner,
+       int                             whichfork,
+       xfs_fileoff_t                   startoff,
+       xfs_fsblock_t                   startblock,
+       xfs_filblks_t                   blockcount,
+       xfs_exntst_t                    state,
+       struct xfs_btree_cur            **pcur)
 {
-       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+       int                             error;
 
-       if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
-               xfs_rui_release(rudp->rud_ruip);
-               kmem_zone_free(xfs_rud_zone, rudp);
-       }
+       error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
+                       startblock, blockcount, state, pcur);
+
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the RUI and frees the RUD
+        * 2.) shuts down the filesystem
+        */
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
+
+       return error;
 }
 
-/*
- * When the rud item is committed to disk, all we need to do is delete our
- * reference to our partner rui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_rud_item_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+/* Sort rmap intents by AG. */
+static int
+xfs_rmap_update_diff_items(
+       void                            *priv,
+       struct list_head                *a,
+       struct list_head                *b)
 {
-       struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+       struct xfs_mount                *mp = priv;
+       struct xfs_rmap_intent          *ra;
+       struct xfs_rmap_intent          *rb;
+
+       ra = container_of(a, struct xfs_rmap_intent, ri_list);
+       rb = container_of(b, struct xfs_rmap_intent, ri_list);
+       return  XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
+               XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
+}
+
+/* Get an RUI. */
+STATIC void *
+xfs_rmap_update_create_intent(
+       struct xfs_trans                *tp,
+       unsigned int                    count)
+{
+       struct xfs_rui_log_item         *ruip;
+
+       ASSERT(tp != NULL);
+       ASSERT(count > 0);
+
+       ruip = xfs_rui_init(tp->t_mountp, count);
+       ASSERT(ruip != NULL);
 
        /*
-        * Drop the RUI reference regardless of whether the RUD has been
-        * aborted. Once the RUD transaction is constructed, it is the sole
-        * responsibility of the RUD to release the RUI (even if the RUI is
-        * aborted due to log I/O error).
+        * Get a log_item_desc to point at the new item.
         */
-       xfs_rui_release(rudp->rud_ruip);
-       kmem_zone_free(xfs_rud_zone, rudp);
-
-       return (xfs_lsn_t)-1;
+       xfs_trans_add_item(tp, &ruip->rui_item);
+       return ruip;
 }
 
-/*
- * The RUD dependency tracking op doesn't do squat.  It can't because
- * it doesn't know where the free extent is coming from.  The dependency
- * tracking has to be handled by the "enclosing" metadata object.  For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Log rmap updates in the intent item. */
 STATIC void
-xfs_rud_item_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
+xfs_rmap_update_log_item(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       struct list_head                *item)
 {
+       struct xfs_rui_log_item         *ruip = intent;
+       struct xfs_rmap_intent          *rmap;
+       uint                            next_extent;
+       struct xfs_map_extent           *map;
+
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
+
+       /*
+        * atomic_inc_return gives us the value after the increment;
+        * we want to use it as an array index so we need to subtract 1 from
+        * it.
+        */
+       next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
+       ASSERT(next_extent < ruip->rui_format.rui_nextents);
+       map = &ruip->rui_format.rui_extents[next_extent];
+       map->me_owner = rmap->ri_owner;
+       map->me_startblock = rmap->ri_bmap.br_startblock;
+       map->me_startoff = rmap->ri_bmap.br_startoff;
+       map->me_len = rmap->ri_bmap.br_blockcount;
+       xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
+                       rmap->ri_bmap.br_state);
 }
 
-/*
- * This is the ops vector shared by all rud log items.
- */
-static const struct xfs_item_ops xfs_rud_item_ops = {
-       .iop_size       = xfs_rud_item_size,
-       .iop_format     = xfs_rud_item_format,
-       .iop_pin        = xfs_rud_item_pin,
-       .iop_unpin      = xfs_rud_item_unpin,
-       .iop_unlock     = xfs_rud_item_unlock,
-       .iop_committed  = xfs_rud_item_committed,
-       .iop_push       = xfs_rud_item_push,
-       .iop_committing = xfs_rud_item_committing,
-};
+/* Get an RUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_rmap_update_create_done(
+       struct xfs_trans                *tp,
+       void                            *intent,
+       unsigned int                    count)
+{
+       return xfs_trans_get_rud(tp, intent);
+}
 
-/*
- * Allocate and initialize an rud item with the given number of extents.
- */
-struct xfs_rud_log_item *
-xfs_rud_init(
-       struct xfs_mount                *mp,
-       struct xfs_rui_log_item         *ruip)
+/* Process a deferred rmap update. */
+STATIC int
+xfs_rmap_update_finish_item(
+       struct xfs_trans                *tp,
+       struct list_head                *item,
+       void                            *done_item,
+       void                            **state)
+{
+       struct xfs_rmap_intent          *rmap;
+       int                             error;
+
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+       error = xfs_trans_log_finish_rmap_update(tp, done_item,
+                       rmap->ri_type,
+                       rmap->ri_owner, rmap->ri_whichfork,
+                       rmap->ri_bmap.br_startoff,
+                       rmap->ri_bmap.br_startblock,
+                       rmap->ri_bmap.br_blockcount,
+                       rmap->ri_bmap.br_state,
+                       (struct xfs_btree_cur **)state);
+       kmem_free(rmap);
+       return error;
+}
+
+/* Clean up after processing deferred rmaps. */
+STATIC void
+xfs_rmap_update_finish_cleanup(
+       struct xfs_trans        *tp,
+       void                    *state,
+       int                     error)
+{
+       struct xfs_btree_cur    *rcur = state;
+
+       xfs_rmap_finish_one_cleanup(tp, rcur, error);
+}
 
+/* Abort all pending RUIs. */
+STATIC void
+xfs_rmap_update_abort_intent(
+       void                            *intent)
 {
-       struct xfs_rud_log_item *rudp;
+       xfs_rui_release(intent);
+}
 
-       rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
-       xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
-       rudp->rud_ruip = ruip;
-       rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_rmap_update_cancel_item(
+       struct list_head                *item)
+{
+       struct xfs_rmap_intent          *rmap;
 
-       return rudp;
+       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+       kmem_free(rmap);
 }
 
+const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
+       .max_items      = XFS_RUI_MAX_FAST_EXTENTS,
+       .diff_items     = xfs_rmap_update_diff_items,
+       .create_intent  = xfs_rmap_update_create_intent,
+       .abort_intent   = xfs_rmap_update_abort_intent,
+       .log_item       = xfs_rmap_update_log_item,
+       .create_done    = xfs_rmap_update_create_done,
+       .finish_item    = xfs_rmap_update_finish_item,
+       .finish_cleanup = xfs_rmap_update_finish_cleanup,
+       .cancel_item    = xfs_rmap_update_cancel_item,
+};
+
 /*
  * Process an rmap update intent item that was recovered from the log.
  * We need to update the rmapbt.
index 7e482baa27f5b5e3a2bf5e89b80933f04dac3b74..8708e4a5aa5c37a29ea994d5220de49edf38c13e 100644 (file)
@@ -78,8 +78,6 @@ extern struct kmem_zone       *xfs_rui_zone;
 extern struct kmem_zone        *xfs_rud_zone;
 
 struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
-struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
-               struct xfs_rui_log_item *);
 int xfs_rui_copy_format(struct xfs_log_iovec *buf,
                struct xfs_rui_log_format *dst_rui_fmt);
 void xfs_rui_item_free(struct xfs_rui_log_item *);
index ac0fcdad0c4edee1f3085aef91641f2f0a2030ca..5fa4db3c3e320b39277ad576946a1cc934b4ea6e 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_mount.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
 #include "xfs_trans.h"
 #include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
 #include "xfs_icache.h"
 #include "xfs_rtalloc.h"
 
index cc509743facd8ddfedc6c8946446f515a79dfb69..113883c4f202e09e2e435f6883d7f6b484a05fe0 100644 (file)
@@ -4,7 +4,6 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
-#include <linux/proc_fs.h>
 
 struct xstats xfsstats;
 
index a14d11d78bd80cfcd1b34beda1d63f931b336723..f9450235533cc4fcbeab7c3f5aba5f6a38cf1e99 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_alloc.h"
-#include "xfs_error.h"
 #include "xfs_fsops.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
-#include "xfs_da_btree.h"
 #include "xfs_dir2.h"
 #include "xfs_extfree_item.h"
 #include "xfs_mru_cache.h"
 #include "xfs_refcount_item.h"
 #include "xfs_bmap_item.h"
 #include "xfs_reflink.h"
-#include "xfs_defer.h"
 
-#include <linux/namei.h>
-#include <linux/dax.h>
-#include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/magic.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
 #include <linux/parser.h>
 
 static const struct super_operations xfs_super_operations;
@@ -582,7 +569,7 @@ xfs_set_inode_alloc(
         * Calculate how much should be reserved for inodes to meet
         * the max inode percentage.  Used only for inode32.
         */
-       if (mp->m_maxicount) {
+       if (M_IGEO(mp)->maxicount) {
                uint64_t        icount;
 
                icount = sbp->sb_dblocks * sbp->sb_imax_pct;
@@ -840,16 +827,10 @@ xfs_init_mount_workqueues(
        if (!mp->m_reclaim_workqueue)
                goto out_destroy_cil;
 
-       mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
-                       WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
-                       mp->m_fsname);
-       if (!mp->m_log_workqueue)
-               goto out_destroy_reclaim;
-
        mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
        if (!mp->m_eofblocks_workqueue)
-               goto out_destroy_log;
+               goto out_destroy_reclaim;
 
        mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
                                               mp->m_fsname);
@@ -860,8 +841,6 @@ xfs_init_mount_workqueues(
 
 out_destroy_eofb:
        destroy_workqueue(mp->m_eofblocks_workqueue);
-out_destroy_log:
-       destroy_workqueue(mp->m_log_workqueue);
 out_destroy_reclaim:
        destroy_workqueue(mp->m_reclaim_workqueue);
 out_destroy_cil:
@@ -880,7 +859,6 @@ xfs_destroy_mount_workqueues(
 {
        destroy_workqueue(mp->m_sync_workqueue);
        destroy_workqueue(mp->m_eofblocks_workqueue);
-       destroy_workqueue(mp->m_log_workqueue);
        destroy_workqueue(mp->m_reclaim_workqueue);
        destroy_workqueue(mp->m_cil_workqueue);
        destroy_workqueue(mp->m_unwritten_workqueue);
@@ -1131,10 +1109,10 @@ xfs_fs_statfs(
 
        fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
        statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
-       if (mp->m_maxicount)
+       if (M_IGEO(mp)->maxicount)
                statp->f_files = min_t(typeof(statp->f_files),
                                        statp->f_files,
-                                       mp->m_maxicount);
+                                       M_IGEO(mp)->maxicount);
 
        /* If sb_icount overshot maxicount, report actual allocation */
        statp->f_files = max_t(typeof(statp->f_files),
@@ -1685,6 +1663,8 @@ xfs_fs_fill_super(
        sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
        sb->s_max_links = XFS_MAXLINK;
        sb->s_time_gran = 1;
+       sb->s_iflags |= SB_I_CGROUPWB;
+
        set_posix_acl_flag(sb);
 
        /* version 5 superblocks support inode version counters. */
index 21cb49a43d7cb0c8c9945fc35cd71dc0e3fceb6d..763e43d22deeffc20862742ccdb4c13cbdd0e796 100644 (file)
@@ -38,6 +38,18 @@ extern void xfs_qm_exit(void);
 # define XFS_SCRUB_STRING
 #endif
 
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+# define XFS_REPAIR_STRING     "repair, "
+#else
+# define XFS_REPAIR_STRING
+#endif
+
+#ifdef CONFIG_XFS_WARN
+# define XFS_WARN_STRING       "verbose warnings, "
+#else
+# define XFS_WARN_STRING
+#endif
+
 #ifdef DEBUG
 # define XFS_DBG_STRING                "debug"
 #else
@@ -49,6 +61,8 @@ extern void xfs_qm_exit(void);
                                XFS_SECURITY_STRING \
                                XFS_REALTIME_STRING \
                                XFS_SCRUB_STRING \
+                               XFS_REPAIR_STRING \
+                               XFS_WARN_STRING \
                                XFS_DBG_STRING /* DBG must be last */
 
 struct xfs_inode;
index b2c1177c717ff4f0083a3788f02c3e6d3c745374..ed66fd2de3273355fa5e74d6e953e72c0d827bdf 100644 (file)
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_defer.h"
 #include "xfs_dir2.h"
 #include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_bmap_util.h"
-#include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
 #include "xfs_trace.h"
-#include "xfs_symlink.h"
 #include "xfs_trans.h"
-#include "xfs_log.h"
 
 /* ----- Kernel only functions below ----- */
 int
index 0cc034dfb78608e23b5a6fea5f41b076c8743b12..31b3bdbd2ebad14ed90aae053dbc40589c0f69d6 100644 (file)
@@ -4,10 +4,7 @@
  * All Rights Reserved.
  */
 #include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
 #include "xfs_error.h"
-#include "xfs_stats.h"
 
 static struct ctl_table_header *xfs_table_header;
 
index ad7f9be130872c9e0664780115ba496bfd49e885..8abf4640f1d552af4cecc866c761b597d08d4fb7 100644 (file)
@@ -82,6 +82,9 @@ enum {
 extern xfs_param_t     xfs_params;
 
 struct xfs_globals {
+#ifdef DEBUG
+       int     pwork_threads;          /* parallel workqueue threads */
+#endif
        int     log_recovery_delay;     /* log recovery delay (secs) */
        int     mount_delay;            /* mount setup delay (secs) */
        bool    bug_on_assert;          /* BUG() the kernel on assert failure */
index cabda13f3c64168a7a33d01e37bf895f9e4a07a4..ddd0bf7a474026059a62710b42b7cf90e5757d1e 100644 (file)
@@ -10,9 +10,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_sysfs.h"
-#include "xfs_log.h"
 #include "xfs_log_priv.h"
-#include "xfs_stats.h"
 #include "xfs_mount.h"
 
 struct xfs_sysfs_attr {
@@ -206,11 +204,51 @@ always_cow_show(
 }
 XFS_SYSFS_ATTR_RW(always_cow);
 
+#ifdef DEBUG
+/*
+ * Override how many threads the parallel work queue is allowed to create.
+ * This has to be a debug-only global (instead of an errortag) because one of
+ * the main users of parallel workqueues is mount time quotacheck.
+ */
+STATIC ssize_t
+pwork_threads_store(
+       struct kobject  *kobject,
+       const char      *buf,
+       size_t          count)
+{
+       int             ret;
+       int             val;
+
+       ret = kstrtoint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val < -1 || val > num_possible_cpus())
+               return -EINVAL;
+
+       xfs_globals.pwork_threads = val;
+
+       return count;
+}
+
+STATIC ssize_t
+pwork_threads_show(
+       struct kobject  *kobject,
+       char            *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads);
+}
+XFS_SYSFS_ATTR_RW(pwork_threads);
+#endif /* DEBUG */
+
 static struct attribute *xfs_dbg_attrs[] = {
        ATTR_LIST(bug_on_assert),
        ATTR_LIST(log_recovery_delay),
        ATTR_LIST(mount_delay),
        ATTR_LIST(always_cow),
+#ifdef DEBUG
+       ATTR_LIST(pwork_threads),
+#endif
        NULL,
 };
 
index cb6489c22cad2015126b6e7b1977c1507b017ad5..bc85b89f88cae1cf58a4353cc050f179fa6a43f3 100644 (file)
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_da_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
 #include "xfs_trans.h"
-#include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_buf_item.h"
 #include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
 #include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_filestream.h"
 #include "xfs_fsmap.h"
 
index 2464ea351f837bd185da6ca7b2e146e30c9bae2d..8094b1920eeff6413648c2af6251c4de1dcb4a0b 100644 (file)
@@ -475,7 +475,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
@@ -3360,6 +3360,7 @@ DEFINE_TRANS_EVENT(xfs_trans_dup);
 DEFINE_TRANS_EVENT(xfs_trans_free);
 DEFINE_TRANS_EVENT(xfs_trans_roll);
 DEFINE_TRANS_EVENT(xfs_trans_add_item);
+DEFINE_TRANS_EVENT(xfs_trans_commit_items);
 DEFINE_TRANS_EVENT(xfs_trans_free_items);
 
 TRACE_EVENT(xfs_iunlink_update_bucket,
@@ -3516,6 +3517,64 @@ DEFINE_EVENT(xfs_inode_corrupt_class, name,      \
 DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick);
 DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
 
+TRACE_EVENT(xfs_iwalk_ag,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agino_t startino),
+       TP_ARGS(mp, agno, startino),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agino_t, startino)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->startino = startino;
+       ),
+       TP_printk("dev %d:%d agno %d startino %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+                 __entry->startino)
+)
+
+TRACE_EVENT(xfs_iwalk_ag_rec,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                struct xfs_inobt_rec_incore *irec),
+       TP_ARGS(mp, agno, irec),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agino_t, startino)
+               __field(uint64_t, freemask)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->startino = irec->ir_startino;
+               __entry->freemask = irec->ir_free;
+       ),
+       TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+                 __entry->startino, __entry->freemask)
+)
+
+TRACE_EVENT(xfs_pwork_init,
+       TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid),
+       TP_ARGS(mp, nr_threads, pid),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned int, nr_threads)
+               __field(pid_t, pid)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->nr_threads = nr_threads;
+               __entry->pid = pid;
+       ),
+       TP_printk("dev %d:%d nr_threads %u pid %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->nr_threads, __entry->pid)
+)
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
index 912b42f5fe4ac61ed79bbc729f5ccf094de66c32..d42a68d8313bdd5721f317a6c64858a67b13d6e6 100644 (file)
@@ -11,7 +11,6 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_inode.h"
 #include "xfs_extent_busy.h"
 #include "xfs_quota.h"
 #include "xfs_trans.h"
@@ -264,9 +263,7 @@ xfs_trans_alloc(
         * GFP_NOFS allocation context so that we avoid lockdep false positives
         * by doing GFP_KERNEL allocations inside sb_start_intwrite().
         */
-       tp = kmem_zone_zalloc(xfs_trans_zone,
-               (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
-
+       tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
        if (!(flags & XFS_TRANS_NO_WRITECOUNT))
                sb_start_intwrite(mp->m_super);
 
@@ -452,7 +449,7 @@ xfs_trans_apply_sb_deltas(
        xfs_buf_t       *bp;
        int             whole = 0;
 
-       bp = xfs_trans_getsb(tp, tp->t_mountp, 0);
+       bp = xfs_trans_getsb(tp, tp->t_mountp);
        sbp = XFS_BUF_TO_SBP(bp);
 
        /*
@@ -767,10 +764,9 @@ xfs_trans_del_item(
 }
 
 /* Detach and unlock all of the items in a transaction */
-void
+static void
 xfs_trans_free_items(
        struct xfs_trans        *tp,
-       xfs_lsn_t               commit_lsn,
        bool                    abort)
 {
        struct xfs_log_item     *lip, *next;
@@ -779,11 +775,10 @@ xfs_trans_free_items(
 
        list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
                xfs_trans_del_item(lip);
-               if (commit_lsn != NULLCOMMITLSN)
-                       lip->li_ops->iop_committing(lip, commit_lsn);
                if (abort)
                        set_bit(XFS_LI_ABORTED, &lip->li_flags);
-               lip->li_ops->iop_unlock(lip);
+               if (lip->li_ops->iop_release)
+                       lip->li_ops->iop_release(lip);
        }
 }
 
@@ -804,7 +799,8 @@ xfs_log_item_batch_insert(
        for (i = 0; i < nr_items; i++) {
                struct xfs_log_item *lip = log_items[i];
 
-               lip->li_ops->iop_unpin(lip, 0);
+               if (lip->li_ops->iop_unpin)
+                       lip->li_ops->iop_unpin(lip, 0);
        }
 }
 
@@ -815,7 +811,7 @@ xfs_log_item_batch_insert(
  *
  * If we are called with the aborted flag set, it is because a log write during
  * a CIL checkpoint commit has failed. In this case, all the items in the
- * checkpoint have already gone through iop_commited and iop_unlock, which
+ * checkpoint have already gone through iop_committed and iop_committing, which
  * means that checkpoint commit abort handling is treated exactly the same
  * as an iclog write error even though we haven't started any IO yet. Hence in
  * this case all we need to do is iop_committed processing, followed by an
@@ -833,7 +829,7 @@ xfs_trans_committed_bulk(
        struct xfs_ail          *ailp,
        struct xfs_log_vec      *log_vector,
        xfs_lsn_t               commit_lsn,
-       int                     aborted)
+       bool                    aborted)
 {
 #define LOG_ITEM_BATCH_SIZE    32
        struct xfs_log_item     *log_items[LOG_ITEM_BATCH_SIZE];
@@ -852,7 +848,16 @@ xfs_trans_committed_bulk(
 
                if (aborted)
                        set_bit(XFS_LI_ABORTED, &lip->li_flags);
-               item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+
+               if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
+                       lip->li_ops->iop_release(lip);
+                       continue;
+               }
+
+               if (lip->li_ops->iop_committed)
+                       item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+               else
+                       item_lsn = commit_lsn;
 
                /* item_lsn of -1 means the item needs no further processing */
                if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
@@ -864,7 +869,8 @@ xfs_trans_committed_bulk(
                 */
                if (aborted) {
                        ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
-                       lip->li_ops->iop_unpin(lip, 1);
+                       if (lip->li_ops->iop_unpin)
+                               lip->li_ops->iop_unpin(lip, 1);
                        continue;
                }
 
@@ -882,7 +888,8 @@ xfs_trans_committed_bulk(
                                xfs_trans_ail_update(ailp, lip, item_lsn);
                        else
                                spin_unlock(&ailp->ail_lock);
-                       lip->li_ops->iop_unpin(lip, 0);
+                       if (lip->li_ops->iop_unpin)
+                               lip->li_ops->iop_unpin(lip, 0);
                        continue;
                }
 
@@ -998,7 +1005,7 @@ out_unreserve:
                tp->t_ticket = NULL;
        }
        current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-       xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
+       xfs_trans_free_items(tp, !!error);
        xfs_trans_free(tp);
 
        XFS_STATS_INC(mp, xs_trans_empty);
@@ -1060,7 +1067,7 @@ xfs_trans_cancel(
        /* mark this thread as no longer being in a transaction */
        current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
 
-       xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
+       xfs_trans_free_items(tp, dirty);
        xfs_trans_free(tp);
 }
 
index c6e1c5704a8c2b0f94fcc56b7f4f8a34057bf752..64d7f171ebd32ae8ebe4992b64700d36e49f18f1 100644 (file)
@@ -27,7 +27,7 @@ struct xfs_cud_log_item;
 struct xfs_bui_log_item;
 struct xfs_bud_log_item;
 
-typedef struct xfs_log_item {
+struct xfs_log_item {
        struct list_head                li_ail;         /* AIL pointers */
        struct list_head                li_trans;       /* transaction list */
        xfs_lsn_t                       li_lsn;         /* last on-disk lsn */
@@ -48,7 +48,7 @@ typedef struct xfs_log_item {
        struct xfs_log_vec              *li_lv;         /* active log vector */
        struct xfs_log_vec              *li_lv_shadow;  /* standby vector */
        xfs_lsn_t                       li_seq;         /* CIL commit seq */
-} xfs_log_item_t;
+};
 
 /*
  * li_flags use the (set/test/clear)_bit atomic interfaces because updates can
@@ -67,17 +67,24 @@ typedef struct xfs_log_item {
        { (1 << XFS_LI_DIRTY),          "DIRTY" }
 
 struct xfs_item_ops {
-       void (*iop_size)(xfs_log_item_t *, int *, int *);
-       void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *);
-       void (*iop_pin)(xfs_log_item_t *);
-       void (*iop_unpin)(xfs_log_item_t *, int remove);
+       unsigned flags;
+       void (*iop_size)(struct xfs_log_item *, int *, int *);
+       void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *);
+       void (*iop_pin)(struct xfs_log_item *);
+       void (*iop_unpin)(struct xfs_log_item *, int remove);
        uint (*iop_push)(struct xfs_log_item *, struct list_head *);
-       void (*iop_unlock)(xfs_log_item_t *);
-       xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
-       void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
-       void (*iop_error)(xfs_log_item_t *, xfs_buf_t *);
+       void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
+       void (*iop_release)(struct xfs_log_item *);
+       xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
+       void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
 };
 
+/*
+ * Release the log item as soon as committed.  This is for items just logging
+ * intents that never need to be written back in place.
+ */
+#define XFS_ITEM_RELEASE_WHEN_COMMITTED        (1 << 0)
+
 void   xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
                          int type, const struct xfs_item_ops *ops);
 
@@ -203,7 +210,7 @@ xfs_trans_read_buf(
                                      flags, bpp, ops);
 }
 
-struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
+struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *);
 
 void           xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
 void           xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
@@ -223,14 +230,6 @@ void               xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
 bool           xfs_trans_buf_is_dirty(struct xfs_buf *bp);
 void           xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
 
-struct xfs_efd_log_item        *xfs_trans_get_efd(struct xfs_trans *,
-                                 struct xfs_efi_log_item *,
-                                 uint);
-int            xfs_trans_free_extent(struct xfs_trans *,
-                                     struct xfs_efd_log_item *, xfs_fsblock_t,
-                                     xfs_extlen_t,
-                                     const struct xfs_owner_info *,
-                                     bool);
 int            xfs_trans_commit(struct xfs_trans *);
 int            xfs_trans_roll(struct xfs_trans **);
 int            xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
@@ -245,37 +244,4 @@ void               xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
 
 extern kmem_zone_t     *xfs_trans_zone;
 
-/* rmap updates */
-enum xfs_rmap_intent_type;
-
-struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
-               struct xfs_rui_log_item *ruip);
-int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
-               struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
-               uint64_t owner, int whichfork, xfs_fileoff_t startoff,
-               xfs_fsblock_t startblock, xfs_filblks_t blockcount,
-               xfs_exntst_t state, struct xfs_btree_cur **pcur);
-
-/* refcount updates */
-enum xfs_refcount_intent_type;
-
-struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp,
-               struct xfs_cui_log_item *cuip);
-int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
-               struct xfs_cud_log_item *cudp,
-               enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
-               xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
-               xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
-
-/* mapping updates */
-enum xfs_bmap_intent_type;
-
-struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp,
-               struct xfs_bui_log_item *buip);
-int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
-               struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type,
-               struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff,
-               xfs_fsblock_t startblock, xfs_filblks_t *blockcount,
-               xfs_exntst_t state);
-
 #endif /* __XFS_TRANS_H__ */
index d3a4e89bf4a0ddb916ed4f5d395285e2e2188869..6ccfd75d3c24ce7207e336b5c3b714c18bdcb69a 100644 (file)
@@ -6,6 +6,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
@@ -74,29 +75,29 @@ xfs_ail_check(
  * Return a pointer to the last item in the AIL.  If the AIL is empty, then
  * return NULL.
  */
-static xfs_log_item_t *
+static struct xfs_log_item *
 xfs_ail_max(
        struct xfs_ail  *ailp)
 {
        if (list_empty(&ailp->ail_head))
                return NULL;
 
-       return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail);
+       return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail);
 }
 
 /*
  * Return a pointer to the item which follows the given item in the AIL.  If
  * the given item is the last item in the list, then return NULL.
  */
-static xfs_log_item_t *
+static struct xfs_log_item *
 xfs_ail_next(
-       struct xfs_ail  *ailp,
-       xfs_log_item_t  *lip)
+       struct xfs_ail          *ailp,
+       struct xfs_log_item     *lip)
 {
        if (lip->li_ail.next == &ailp->ail_head)
                return NULL;
 
-       return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
+       return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail);
 }
 
 /*
@@ -109,10 +110,10 @@ xfs_ail_next(
  */
 xfs_lsn_t
 xfs_ail_min_lsn(
-       struct xfs_ail  *ailp)
+       struct xfs_ail          *ailp)
 {
-       xfs_lsn_t       lsn = 0;
-       xfs_log_item_t  *lip;
+       xfs_lsn_t               lsn = 0;
+       struct xfs_log_item     *lip;
 
        spin_lock(&ailp->ail_lock);
        lip = xfs_ail_min(ailp);
@@ -128,10 +129,10 @@ xfs_ail_min_lsn(
  */
 static xfs_lsn_t
 xfs_ail_max_lsn(
-       struct xfs_ail  *ailp)
+       struct xfs_ail          *ailp)
 {
-       xfs_lsn_t       lsn = 0;
-       xfs_log_item_t  *lip;
+       xfs_lsn_t               lsn = 0;
+       struct xfs_log_item     *lip;
 
        spin_lock(&ailp->ail_lock);
        lip = xfs_ail_max(ailp);
@@ -216,13 +217,13 @@ xfs_trans_ail_cursor_clear(
  * ascending traversal.  Pass a @lsn of zero to initialise the cursor to the
  * first item in the AIL. Returns NULL if the list is empty.
  */
-xfs_log_item_t *
+struct xfs_log_item *
 xfs_trans_ail_cursor_first(
        struct xfs_ail          *ailp,
        struct xfs_ail_cursor   *cur,
        xfs_lsn_t               lsn)
 {
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
 
        xfs_trans_ail_cursor_init(ailp, cur);
 
@@ -248,7 +249,7 @@ __xfs_trans_ail_cursor_last(
        struct xfs_ail          *ailp,
        xfs_lsn_t               lsn)
 {
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
 
        list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
                if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
@@ -327,8 +328,8 @@ xfs_ail_splice(
  */
 static void
 xfs_ail_delete(
-       struct xfs_ail  *ailp,
-       xfs_log_item_t  *lip)
+       struct xfs_ail          *ailp,
+       struct xfs_log_item     *lip)
 {
        xfs_ail_check(ailp, lip);
        list_del(&lip->li_ail);
@@ -347,6 +348,14 @@ xfsaild_push_item(
        if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
                return XFS_ITEM_PINNED;
 
+       /*
+        * Consider the item pinned if a push callback is not defined so the
+        * caller will force the log. This should only happen for intent items
+        * as they are unpinned once the associated done item is committed to
+        * the on-disk log.
+        */
+       if (!lip->li_ops->iop_push)
+               return XFS_ITEM_PINNED;
        return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
 }
 
@@ -356,7 +365,7 @@ xfsaild_push(
 {
        xfs_mount_t             *mp = ailp->ail_mount;
        struct xfs_ail_cursor   cur;
-       xfs_log_item_t          *lip;
+       struct xfs_log_item     *lip;
        xfs_lsn_t               lsn;
        xfs_lsn_t               target;
        long                    tout;
@@ -611,10 +620,10 @@ xfsaild(
  */
 void
 xfs_ail_push(
-       struct xfs_ail  *ailp,
-       xfs_lsn_t       threshold_lsn)
+       struct xfs_ail          *ailp,
+       xfs_lsn_t               threshold_lsn)
 {
-       xfs_log_item_t  *lip;
+       struct xfs_log_item     *lip;
 
        lip = xfs_ail_min(ailp);
        if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
@@ -699,7 +708,7 @@ xfs_trans_ail_update_bulk(
        int                     nr_items,
        xfs_lsn_t               lsn) __releases(ailp->ail_lock)
 {
-       xfs_log_item_t          *mlip;
+       struct xfs_log_item     *mlip;
        int                     mlip_changed = 0;
        int                     i;
        LIST_HEAD(tmp);
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
deleted file mode 100644 (file)
index e1c7d55..0000000
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle.  All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_bmap_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_inode.h"
-
-/*
- * This routine is called to allocate a "bmap update done"
- * log item.
- */
-struct xfs_bud_log_item *
-xfs_trans_get_bud(
-       struct xfs_trans                *tp,
-       struct xfs_bui_log_item         *buip)
-{
-       struct xfs_bud_log_item         *budp;
-
-       budp = xfs_bud_init(tp->t_mountp, buip);
-       xfs_trans_add_item(tp, &budp->bud_item);
-       return budp;
-}
-
-/*
- * Finish an bmap update and log it to the BUD. Note that the
- * transaction is marked dirty regardless of whether the bmap update
- * succeeds or fails to support the BUI/BUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_bmap_update(
-       struct xfs_trans                *tp,
-       struct xfs_bud_log_item         *budp,
-       enum xfs_bmap_intent_type       type,
-       struct xfs_inode                *ip,
-       int                             whichfork,
-       xfs_fileoff_t                   startoff,
-       xfs_fsblock_t                   startblock,
-       xfs_filblks_t                   *blockcount,
-       xfs_exntst_t                    state)
-{
-       int                             error;
-
-       error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
-                       startblock, blockcount, state);
-
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the BUI and frees the BUD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
-
-       return error;
-}
-
-/* Sort bmap intents by inode. */
-static int
-xfs_bmap_update_diff_items(
-       void                            *priv,
-       struct list_head                *a,
-       struct list_head                *b)
-{
-       struct xfs_bmap_intent          *ba;
-       struct xfs_bmap_intent          *bb;
-
-       ba = container_of(a, struct xfs_bmap_intent, bi_list);
-       bb = container_of(b, struct xfs_bmap_intent, bi_list);
-       return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
-}
-
-/* Get an BUI. */
-STATIC void *
-xfs_bmap_update_create_intent(
-       struct xfs_trans                *tp,
-       unsigned int                    count)
-{
-       struct xfs_bui_log_item         *buip;
-
-       ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
-       ASSERT(tp != NULL);
-
-       buip = xfs_bui_init(tp->t_mountp);
-       ASSERT(buip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &buip->bui_item);
-       return buip;
-}
-
-/* Set the map extent flags for this mapping. */
-static void
-xfs_trans_set_bmap_flags(
-       struct xfs_map_extent           *bmap,
-       enum xfs_bmap_intent_type       type,
-       int                             whichfork,
-       xfs_exntst_t                    state)
-{
-       bmap->me_flags = 0;
-       switch (type) {
-       case XFS_BMAP_MAP:
-       case XFS_BMAP_UNMAP:
-               bmap->me_flags = type;
-               break;
-       default:
-               ASSERT(0);
-       }
-       if (state == XFS_EXT_UNWRITTEN)
-               bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
-       if (whichfork == XFS_ATTR_FORK)
-               bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
-}
-
-/* Log bmap updates in the intent item. */
-STATIC void
-xfs_bmap_update_log_item(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       struct list_head                *item)
-{
-       struct xfs_bui_log_item         *buip = intent;
-       struct xfs_bmap_intent          *bmap;
-       uint                            next_extent;
-       struct xfs_map_extent           *map;
-
-       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
-       ASSERT(next_extent < buip->bui_format.bui_nextents);
-       map = &buip->bui_format.bui_extents[next_extent];
-       map->me_owner = bmap->bi_owner->i_ino;
-       map->me_startblock = bmap->bi_bmap.br_startblock;
-       map->me_startoff = bmap->bi_bmap.br_startoff;
-       map->me_len = bmap->bi_bmap.br_blockcount;
-       xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
-                       bmap->bi_bmap.br_state);
-}
-
-/* Get an BUD so we can process all the deferred rmap updates. */
-STATIC void *
-xfs_bmap_update_create_done(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       unsigned int                    count)
-{
-       return xfs_trans_get_bud(tp, intent);
-}
-
-/* Process a deferred rmap update. */
-STATIC int
-xfs_bmap_update_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_bmap_intent          *bmap;
-       xfs_filblks_t                   count;
-       int                             error;
-
-       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-       count = bmap->bi_bmap.br_blockcount;
-       error = xfs_trans_log_finish_bmap_update(tp, done_item,
-                       bmap->bi_type,
-                       bmap->bi_owner, bmap->bi_whichfork,
-                       bmap->bi_bmap.br_startoff,
-                       bmap->bi_bmap.br_startblock,
-                       &count,
-                       bmap->bi_bmap.br_state);
-       if (!error && count > 0) {
-               ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
-               bmap->bi_bmap.br_blockcount = count;
-               return -EAGAIN;
-       }
-       kmem_free(bmap);
-       return error;
-}
-
-/* Abort all pending BUIs. */
-STATIC void
-xfs_bmap_update_abort_intent(
-       void                            *intent)
-{
-       xfs_bui_release(intent);
-}
-
-/* Cancel a deferred rmap update. */
-STATIC void
-xfs_bmap_update_cancel_item(
-       struct list_head                *item)
-{
-       struct xfs_bmap_intent          *bmap;
-
-       bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-       kmem_free(bmap);
-}
-
-const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
-       .max_items      = XFS_BUI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_bmap_update_diff_items,
-       .create_intent  = xfs_bmap_update_create_intent,
-       .abort_intent   = xfs_bmap_update_abort_intent,
-       .log_item       = xfs_bmap_update_log_item,
-       .create_done    = xfs_bmap_update_create_done,
-       .finish_item    = xfs_bmap_update_finish_item,
-       .cancel_item    = xfs_bmap_update_cancel_item,
-};
index 7d65ebf1e847a9c07c0fbb8178b26892ad3390e2..b5b3a78ef31c41e0f057d8f8ab752a21ec8e910e 100644 (file)
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
-#include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
-#include "xfs_error.h"
 #include "xfs_trace.h"
 
 /*
@@ -174,8 +172,7 @@ xfs_trans_get_buf_map(
 xfs_buf_t *
 xfs_trans_getsb(
        xfs_trans_t             *tp,
-       struct xfs_mount        *mp,
-       int                     flags)
+       struct xfs_mount        *mp)
 {
        xfs_buf_t               *bp;
        struct xfs_buf_log_item *bip;
@@ -185,7 +182,7 @@ xfs_trans_getsb(
         * if tp is NULL.
         */
        if (tp == NULL)
-               return xfs_getsb(mp, flags);
+               return xfs_getsb(mp);
 
        /*
         * If the superblock buffer already has this transaction
@@ -203,7 +200,7 @@ xfs_trans_getsb(
                return bp;
        }
 
-       bp = xfs_getsb(mp, flags);
+       bp = xfs_getsb(mp);
        if (bp == NULL)
                return NULL;
 
@@ -428,7 +425,7 @@ xfs_trans_brelse(
 
 /*
  * Mark the buffer as not needing to be unlocked when the buf item's
- * iop_unlock() routine is called.  The buffer must already be locked
+ * iop_committing() routine is called.  The buffer must already be locked
  * and associated with the given transaction.
  */
 /* ARGSUSED */
index cd664a03613fed2bb504675e009574a9b07ad5a9..1027c9ca6eb8a0e0adb8bb1ed558e5eab6115e08 100644 (file)
@@ -11,7 +11,6 @@
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_inode.h"
-#include "xfs_error.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_quota.h"
@@ -29,7 +28,6 @@ xfs_trans_dqjoin(
        xfs_trans_t     *tp,
        xfs_dquot_t     *dqp)
 {
-       ASSERT(dqp->q_transp != tp);
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        ASSERT(dqp->q_logitem.qli_dquot == dqp);
 
@@ -37,15 +35,8 @@ xfs_trans_dqjoin(
         * Get a log_item_desc to point at the new item.
         */
        xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
-       /*
-        * Initialize d_transp so we can later determine if this dquot is
-        * associated with this transaction.
-        */
-       dqp->q_transp = tp;
 }
 
-
 /*
  * This is called to mark the dquot as needing
  * to be logged when the transaction is committed.  The dquot must
@@ -61,7 +52,6 @@ xfs_trans_log_dquot(
        xfs_trans_t     *tp,
        xfs_dquot_t     *dqp)
 {
-       ASSERT(dqp->q_transp == tp);
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
 
        tp->t_flags |= XFS_TRANS_DIRTY;
@@ -347,7 +337,6 @@ xfs_trans_apply_dquot_deltas(
                                break;
 
                        ASSERT(XFS_DQ_IS_LOCKED(dqp));
-                       ASSERT(dqp->q_transp == tp);
 
                        /*
                         * adjust the actual number of blocks used
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
deleted file mode 100644 (file)
index 8ee7a3f..0000000
+++ /dev/null
@@ -1,286 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_extfree_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_trace.h"
-
-/*
- * This routine is called to allocate an "extent free done"
- * log item that will hold nextents worth of extents.  The
- * caller must use all nextents extents, because we are not
- * flexible about this at all.
- */
-struct xfs_efd_log_item *
-xfs_trans_get_efd(struct xfs_trans             *tp,
-                 struct xfs_efi_log_item       *efip,
-                 uint                          nextents)
-{
-       struct xfs_efd_log_item                 *efdp;
-
-       ASSERT(tp != NULL);
-       ASSERT(nextents > 0);
-
-       efdp = xfs_efd_init(tp->t_mountp, efip, nextents);
-       ASSERT(efdp != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &efdp->efd_item);
-       return efdp;
-}
-
-/*
- * Free an extent and log it to the EFD. Note that the transaction is marked
- * dirty regardless of whether the extent free succeeds or fails to support the
- * EFI/EFD lifecycle rules.
- */
-int
-xfs_trans_free_extent(
-       struct xfs_trans                *tp,
-       struct xfs_efd_log_item         *efdp,
-       xfs_fsblock_t                   start_block,
-       xfs_extlen_t                    ext_len,
-       const struct xfs_owner_info     *oinfo,
-       bool                            skip_discard)
-{
-       struct xfs_mount                *mp = tp->t_mountp;
-       struct xfs_extent               *extp;
-       uint                            next_extent;
-       xfs_agnumber_t                  agno = XFS_FSB_TO_AGNO(mp, start_block);
-       xfs_agblock_t                   agbno = XFS_FSB_TO_AGBNO(mp,
-                                                               start_block);
-       int                             error;
-
-       trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
-
-       error = __xfs_free_extent(tp, start_block, ext_len,
-                                 oinfo, XFS_AG_RESV_NONE, skip_discard);
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the EFI and frees the EFD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
-
-       next_extent = efdp->efd_next_extent;
-       ASSERT(next_extent < efdp->efd_format.efd_nextents);
-       extp = &(efdp->efd_format.efd_extents[next_extent]);
-       extp->ext_start = start_block;
-       extp->ext_len = ext_len;
-       efdp->efd_next_extent++;
-
-       return error;
-}
-
-/* Sort bmap items by AG. */
-static int
-xfs_extent_free_diff_items(
-       void                            *priv,
-       struct list_head                *a,
-       struct list_head                *b)
-{
-       struct xfs_mount                *mp = priv;
-       struct xfs_extent_free_item     *ra;
-       struct xfs_extent_free_item     *rb;
-
-       ra = container_of(a, struct xfs_extent_free_item, xefi_list);
-       rb = container_of(b, struct xfs_extent_free_item, xefi_list);
-       return  XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
-               XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
-}
-
-/* Get an EFI. */
-STATIC void *
-xfs_extent_free_create_intent(
-       struct xfs_trans                *tp,
-       unsigned int                    count)
-{
-       struct xfs_efi_log_item         *efip;
-
-       ASSERT(tp != NULL);
-       ASSERT(count > 0);
-
-       efip = xfs_efi_init(tp->t_mountp, count);
-       ASSERT(efip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &efip->efi_item);
-       return efip;
-}
-
-/* Log a free extent to the intent item. */
-STATIC void
-xfs_extent_free_log_item(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       struct list_head                *item)
-{
-       struct xfs_efi_log_item         *efip = intent;
-       struct xfs_extent_free_item     *free;
-       uint                            next_extent;
-       struct xfs_extent               *extp;
-
-       free = container_of(item, struct xfs_extent_free_item, xefi_list);
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
-       ASSERT(next_extent < efip->efi_format.efi_nextents);
-       extp = &efip->efi_format.efi_extents[next_extent];
-       extp->ext_start = free->xefi_startblock;
-       extp->ext_len = free->xefi_blockcount;
-}
-
-/* Get an EFD so we can process all the free extents. */
-STATIC void *
-xfs_extent_free_create_done(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       unsigned int                    count)
-{
-       return xfs_trans_get_efd(tp, intent, count);
-}
-
-/* Process a free extent. */
-STATIC int
-xfs_extent_free_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_extent_free_item     *free;
-       int                             error;
-
-       free = container_of(item, struct xfs_extent_free_item, xefi_list);
-       error = xfs_trans_free_extent(tp, done_item,
-                       free->xefi_startblock,
-                       free->xefi_blockcount,
-                       &free->xefi_oinfo, free->xefi_skip_discard);
-       kmem_free(free);
-       return error;
-}
-
-/* Abort all pending EFIs. */
-STATIC void
-xfs_extent_free_abort_intent(
-       void                            *intent)
-{
-       xfs_efi_release(intent);
-}
-
-/* Cancel a free extent. */
-STATIC void
-xfs_extent_free_cancel_item(
-       struct list_head                *item)
-{
-       struct xfs_extent_free_item     *free;
-
-       free = container_of(item, struct xfs_extent_free_item, xefi_list);
-       kmem_free(free);
-}
-
-const struct xfs_defer_op_type xfs_extent_free_defer_type = {
-       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_extent_free_diff_items,
-       .create_intent  = xfs_extent_free_create_intent,
-       .abort_intent   = xfs_extent_free_abort_intent,
-       .log_item       = xfs_extent_free_log_item,
-       .create_done    = xfs_extent_free_create_done,
-       .finish_item    = xfs_extent_free_finish_item,
-       .cancel_item    = xfs_extent_free_cancel_item,
-};
-
-/*
- * AGFL blocks are accounted differently in the reserve pools and are not
- * inserted into the busy extent list.
- */
-STATIC int
-xfs_agfl_free_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_mount                *mp = tp->t_mountp;
-       struct xfs_efd_log_item         *efdp = done_item;
-       struct xfs_extent_free_item     *free;
-       struct xfs_extent               *extp;
-       struct xfs_buf                  *agbp;
-       int                             error;
-       xfs_agnumber_t                  agno;
-       xfs_agblock_t                   agbno;
-       uint                            next_extent;
-
-       free = container_of(item, struct xfs_extent_free_item, xefi_list);
-       ASSERT(free->xefi_blockcount == 1);
-       agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
-       agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
-
-       trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
-
-       error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
-       if (!error)
-               error = xfs_free_agfl_block(tp, agno, agbno, agbp,
-                                           &free->xefi_oinfo);
-
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the EFI and frees the EFD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
-
-       next_extent = efdp->efd_next_extent;
-       ASSERT(next_extent < efdp->efd_format.efd_nextents);
-       extp = &(efdp->efd_format.efd_extents[next_extent]);
-       extp->ext_start = free->xefi_startblock;
-       extp->ext_len = free->xefi_blockcount;
-       efdp->efd_next_extent++;
-
-       kmem_free(free);
-       return error;
-}
-
-
-/* sub-type with special handling for AGFL deferred frees */
-const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
-       .max_items      = XFS_EFI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_extent_free_diff_items,
-       .create_intent  = xfs_extent_free_create_intent,
-       .abort_intent   = xfs_extent_free_abort_intent,
-       .log_item       = xfs_extent_free_log_item,
-       .create_done    = xfs_extent_free_create_done,
-       .finish_item    = xfs_agfl_free_finish_item,
-       .cancel_item    = xfs_extent_free_cancel_item,
-};
index 542927321a61b5e2ff52b30faca74dac268c1722..93d14e47269d1f1cbc18906de9edbb6ab18dadd4 100644 (file)
@@ -8,13 +8,10 @@
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_inode_item.h"
-#include "xfs_trace.h"
 
 #include <linux/iversion.h>
 
index 091eae9f4e7434e7d40364567dc98e84aa177060..2e073c1c4614f2a79cc9452854da1cead65fb06c 100644 (file)
@@ -16,12 +16,10 @@ struct xfs_log_vec;
 void   xfs_trans_init(struct xfs_mount *);
 void   xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
 void   xfs_trans_del_item(struct xfs_log_item *);
-void   xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
-                               bool abort);
 void   xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
 
 void   xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
-                               xfs_lsn_t commit_lsn, int aborted);
+                               xfs_lsn_t commit_lsn, bool aborted);
 /*
  * AIL traversal cursor.
  *
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c
deleted file mode 100644 (file)
index 8d73472..0000000
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle.  All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_refcount_item.h"
-#include "xfs_alloc.h"
-#include "xfs_refcount.h"
-
-/*
- * This routine is called to allocate a "refcount update done"
- * log item.
- */
-struct xfs_cud_log_item *
-xfs_trans_get_cud(
-       struct xfs_trans                *tp,
-       struct xfs_cui_log_item         *cuip)
-{
-       struct xfs_cud_log_item         *cudp;
-
-       cudp = xfs_cud_init(tp->t_mountp, cuip);
-       xfs_trans_add_item(tp, &cudp->cud_item);
-       return cudp;
-}
-
-/*
- * Finish an refcount update and log it to the CUD. Note that the
- * transaction is marked dirty regardless of whether the refcount
- * update succeeds or fails to support the CUI/CUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_refcount_update(
-       struct xfs_trans                *tp,
-       struct xfs_cud_log_item         *cudp,
-       enum xfs_refcount_intent_type   type,
-       xfs_fsblock_t                   startblock,
-       xfs_extlen_t                    blockcount,
-       xfs_fsblock_t                   *new_fsb,
-       xfs_extlen_t                    *new_len,
-       struct xfs_btree_cur            **pcur)
-{
-       int                             error;
-
-       error = xfs_refcount_finish_one(tp, type, startblock,
-                       blockcount, new_fsb, new_len, pcur);
-
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the CUI and frees the CUD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
-
-       return error;
-}
-
-/* Sort refcount intents by AG. */
-static int
-xfs_refcount_update_diff_items(
-       void                            *priv,
-       struct list_head                *a,
-       struct list_head                *b)
-{
-       struct xfs_mount                *mp = priv;
-       struct xfs_refcount_intent      *ra;
-       struct xfs_refcount_intent      *rb;
-
-       ra = container_of(a, struct xfs_refcount_intent, ri_list);
-       rb = container_of(b, struct xfs_refcount_intent, ri_list);
-       return  XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
-               XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
-}
-
-/* Get an CUI. */
-STATIC void *
-xfs_refcount_update_create_intent(
-       struct xfs_trans                *tp,
-       unsigned int                    count)
-{
-       struct xfs_cui_log_item         *cuip;
-
-       ASSERT(tp != NULL);
-       ASSERT(count > 0);
-
-       cuip = xfs_cui_init(tp->t_mountp, count);
-       ASSERT(cuip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &cuip->cui_item);
-       return cuip;
-}
-
-/* Set the phys extent flags for this reverse mapping. */
-static void
-xfs_trans_set_refcount_flags(
-       struct xfs_phys_extent          *refc,
-       enum xfs_refcount_intent_type   type)
-{
-       refc->pe_flags = 0;
-       switch (type) {
-       case XFS_REFCOUNT_INCREASE:
-       case XFS_REFCOUNT_DECREASE:
-       case XFS_REFCOUNT_ALLOC_COW:
-       case XFS_REFCOUNT_FREE_COW:
-               refc->pe_flags |= type;
-               break;
-       default:
-               ASSERT(0);
-       }
-}
-
-/* Log refcount updates in the intent item. */
-STATIC void
-xfs_refcount_update_log_item(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       struct list_head                *item)
-{
-       struct xfs_cui_log_item         *cuip = intent;
-       struct xfs_refcount_intent      *refc;
-       uint                            next_extent;
-       struct xfs_phys_extent          *ext;
-
-       refc = container_of(item, struct xfs_refcount_intent, ri_list);
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
-       ASSERT(next_extent < cuip->cui_format.cui_nextents);
-       ext = &cuip->cui_format.cui_extents[next_extent];
-       ext->pe_startblock = refc->ri_startblock;
-       ext->pe_len = refc->ri_blockcount;
-       xfs_trans_set_refcount_flags(ext, refc->ri_type);
-}
-
-/* Get an CUD so we can process all the deferred refcount updates. */
-STATIC void *
-xfs_refcount_update_create_done(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       unsigned int                    count)
-{
-       return xfs_trans_get_cud(tp, intent);
-}
-
-/* Process a deferred refcount update. */
-STATIC int
-xfs_refcount_update_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_refcount_intent      *refc;
-       xfs_fsblock_t                   new_fsb;
-       xfs_extlen_t                    new_aglen;
-       int                             error;
-
-       refc = container_of(item, struct xfs_refcount_intent, ri_list);
-       error = xfs_trans_log_finish_refcount_update(tp, done_item,
-                       refc->ri_type,
-                       refc->ri_startblock,
-                       refc->ri_blockcount,
-                       &new_fsb, &new_aglen,
-                       (struct xfs_btree_cur **)state);
-       /* Did we run out of reservation?  Requeue what we didn't finish. */
-       if (!error && new_aglen > 0) {
-               ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
-                      refc->ri_type == XFS_REFCOUNT_DECREASE);
-               refc->ri_startblock = new_fsb;
-               refc->ri_blockcount = new_aglen;
-               return -EAGAIN;
-       }
-       kmem_free(refc);
-       return error;
-}
-
-/* Clean up after processing deferred refcounts. */
-STATIC void
-xfs_refcount_update_finish_cleanup(
-       struct xfs_trans        *tp,
-       void                    *state,
-       int                     error)
-{
-       struct xfs_btree_cur    *rcur = state;
-
-       xfs_refcount_finish_one_cleanup(tp, rcur, error);
-}
-
-/* Abort all pending CUIs. */
-STATIC void
-xfs_refcount_update_abort_intent(
-       void                            *intent)
-{
-       xfs_cui_release(intent);
-}
-
-/* Cancel a deferred refcount update. */
-STATIC void
-xfs_refcount_update_cancel_item(
-       struct list_head                *item)
-{
-       struct xfs_refcount_intent      *refc;
-
-       refc = container_of(item, struct xfs_refcount_intent, ri_list);
-       kmem_free(refc);
-}
-
-const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
-       .max_items      = XFS_CUI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_refcount_update_diff_items,
-       .create_intent  = xfs_refcount_update_create_intent,
-       .abort_intent   = xfs_refcount_update_abort_intent,
-       .log_item       = xfs_refcount_update_log_item,
-       .create_done    = xfs_refcount_update_create_done,
-       .finish_item    = xfs_refcount_update_finish_item,
-       .finish_cleanup = xfs_refcount_update_finish_cleanup,
-       .cancel_item    = xfs_refcount_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
deleted file mode 100644 (file)
index 5c7936b..0000000
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle.  All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_rmap_item.h"
-#include "xfs_alloc.h"
-#include "xfs_rmap.h"
-
-/* Set the map extent flags for this reverse mapping. */
-static void
-xfs_trans_set_rmap_flags(
-       struct xfs_map_extent           *rmap,
-       enum xfs_rmap_intent_type       type,
-       int                             whichfork,
-       xfs_exntst_t                    state)
-{
-       rmap->me_flags = 0;
-       if (state == XFS_EXT_UNWRITTEN)
-               rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
-       if (whichfork == XFS_ATTR_FORK)
-               rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
-       switch (type) {
-       case XFS_RMAP_MAP:
-               rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
-               break;
-       case XFS_RMAP_MAP_SHARED:
-               rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
-               break;
-       case XFS_RMAP_UNMAP:
-               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
-               break;
-       case XFS_RMAP_UNMAP_SHARED:
-               rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
-               break;
-       case XFS_RMAP_CONVERT:
-               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
-               break;
-       case XFS_RMAP_CONVERT_SHARED:
-               rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
-               break;
-       case XFS_RMAP_ALLOC:
-               rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
-               break;
-       case XFS_RMAP_FREE:
-               rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
-               break;
-       default:
-               ASSERT(0);
-       }
-}
-
-struct xfs_rud_log_item *
-xfs_trans_get_rud(
-       struct xfs_trans                *tp,
-       struct xfs_rui_log_item         *ruip)
-{
-       struct xfs_rud_log_item         *rudp;
-
-       rudp = xfs_rud_init(tp->t_mountp, ruip);
-       xfs_trans_add_item(tp, &rudp->rud_item);
-       return rudp;
-}
-
-/*
- * Finish an rmap update and log it to the RUD. Note that the transaction is
- * marked dirty regardless of whether the rmap update succeeds or fails to
- * support the RUI/RUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_rmap_update(
-       struct xfs_trans                *tp,
-       struct xfs_rud_log_item         *rudp,
-       enum xfs_rmap_intent_type       type,
-       uint64_t                        owner,
-       int                             whichfork,
-       xfs_fileoff_t                   startoff,
-       xfs_fsblock_t                   startblock,
-       xfs_filblks_t                   blockcount,
-       xfs_exntst_t                    state,
-       struct xfs_btree_cur            **pcur)
-{
-       int                             error;
-
-       error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
-                       startblock, blockcount, state, pcur);
-
-       /*
-        * Mark the transaction dirty, even on error. This ensures the
-        * transaction is aborted, which:
-        *
-        * 1.) releases the RUI and frees the RUD
-        * 2.) shuts down the filesystem
-        */
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
-
-       return error;
-}
-
-/* Sort rmap intents by AG. */
-static int
-xfs_rmap_update_diff_items(
-       void                            *priv,
-       struct list_head                *a,
-       struct list_head                *b)
-{
-       struct xfs_mount                *mp = priv;
-       struct xfs_rmap_intent          *ra;
-       struct xfs_rmap_intent          *rb;
-
-       ra = container_of(a, struct xfs_rmap_intent, ri_list);
-       rb = container_of(b, struct xfs_rmap_intent, ri_list);
-       return  XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
-               XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
-}
-
-/* Get an RUI. */
-STATIC void *
-xfs_rmap_update_create_intent(
-       struct xfs_trans                *tp,
-       unsigned int                    count)
-{
-       struct xfs_rui_log_item         *ruip;
-
-       ASSERT(tp != NULL);
-       ASSERT(count > 0);
-
-       ruip = xfs_rui_init(tp->t_mountp, count);
-       ASSERT(ruip != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &ruip->rui_item);
-       return ruip;
-}
-
-/* Log rmap updates in the intent item. */
-STATIC void
-xfs_rmap_update_log_item(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       struct list_head                *item)
-{
-       struct xfs_rui_log_item         *ruip = intent;
-       struct xfs_rmap_intent          *rmap;
-       uint                            next_extent;
-       struct xfs_map_extent           *map;
-
-       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
-
-       /*
-        * atomic_inc_return gives us the value after the increment;
-        * we want to use it as an array index so we need to subtract 1 from
-        * it.
-        */
-       next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
-       ASSERT(next_extent < ruip->rui_format.rui_nextents);
-       map = &ruip->rui_format.rui_extents[next_extent];
-       map->me_owner = rmap->ri_owner;
-       map->me_startblock = rmap->ri_bmap.br_startblock;
-       map->me_startoff = rmap->ri_bmap.br_startoff;
-       map->me_len = rmap->ri_bmap.br_blockcount;
-       xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
-                       rmap->ri_bmap.br_state);
-}
-
-/* Get an RUD so we can process all the deferred rmap updates. */
-STATIC void *
-xfs_rmap_update_create_done(
-       struct xfs_trans                *tp,
-       void                            *intent,
-       unsigned int                    count)
-{
-       return xfs_trans_get_rud(tp, intent);
-}
-
-/* Process a deferred rmap update. */
-STATIC int
-xfs_rmap_update_finish_item(
-       struct xfs_trans                *tp,
-       struct list_head                *item,
-       void                            *done_item,
-       void                            **state)
-{
-       struct xfs_rmap_intent          *rmap;
-       int                             error;
-
-       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-       error = xfs_trans_log_finish_rmap_update(tp, done_item,
-                       rmap->ri_type,
-                       rmap->ri_owner, rmap->ri_whichfork,
-                       rmap->ri_bmap.br_startoff,
-                       rmap->ri_bmap.br_startblock,
-                       rmap->ri_bmap.br_blockcount,
-                       rmap->ri_bmap.br_state,
-                       (struct xfs_btree_cur **)state);
-       kmem_free(rmap);
-       return error;
-}
-
-/* Clean up after processing deferred rmaps. */
-STATIC void
-xfs_rmap_update_finish_cleanup(
-       struct xfs_trans        *tp,
-       void                    *state,
-       int                     error)
-{
-       struct xfs_btree_cur    *rcur = state;
-
-       xfs_rmap_finish_one_cleanup(tp, rcur, error);
-}
-
-/* Abort all pending RUIs. */
-STATIC void
-xfs_rmap_update_abort_intent(
-       void                            *intent)
-{
-       xfs_rui_release(intent);
-}
-
-/* Cancel a deferred rmap update. */
-STATIC void
-xfs_rmap_update_cancel_item(
-       struct list_head                *item)
-{
-       struct xfs_rmap_intent          *rmap;
-
-       rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-       kmem_free(rmap);
-}
-
-const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
-       .max_items      = XFS_RUI_MAX_FAST_EXTENTS,
-       .diff_items     = xfs_rmap_update_diff_items,
-       .create_intent  = xfs_rmap_update_create_intent,
-       .abort_intent   = xfs_rmap_update_abort_intent,
-       .log_item       = xfs_rmap_update_log_item,
-       .create_done    = xfs_rmap_update_create_done,
-       .finish_item    = xfs_rmap_update_finish_item,
-       .finish_cleanup = xfs_rmap_update_finish_cleanup,
-       .cancel_item    = xfs_rmap_update_cancel_item,
-};
index 9a63016009a1394f41beaff8323a5568b6ceab22..3123b5aaad2a15ef3652892372c7808ebc091dd5 100644 (file)
@@ -5,15 +5,12 @@
  */
 
 #include "xfs.h"
+#include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
 #include "xfs_da_format.h"
 #include "xfs_inode.h"
 #include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
 
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>