Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2011 16:57:40 +0000 (09:57 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2011 16:57:41 +0000 (09:57 -0700)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (43 commits)
  ext4: fix a BUG in mb_mark_used during trim.
  ext4: unused variables cleanup in fs/ext4/extents.c
  ext4: remove redundant set_buffer_mapped() in ext4_da_get_block_prep()
  ext4: add more tracepoints and use dev_t in the trace buffer
  ext4: don't kfree uninitialized s_group_info members
  ext4: add missing space in printk's in __ext4_grp_locked_error()
  ext4: add FITRIM to compat_ioctl.
  ext4: handle errors in ext4_clear_blocks()
  ext4: unify the ext4_handle_release_buffer() api
  ext4: handle errors in ext4_rename
  jbd2: add COW fields to struct jbd2_journal_handle
  jbd2: add the b_cow_tid field to journal_head struct
  ext4: Initialize fsync transaction ids in ext4_new_inode()
  ext4: Use single thread to perform DIO unwritten convertion
  ext4: optimize ext4_bio_write_page() when no extent conversion is needed
  ext4: skip orphan cleanup if fs has unknown ROCOMPAT features
  ext4: use the nblocks arg to ext4_truncate_restart_trans()
  ext4: fix missing iput of root inode for some mount error paths
  ext4: make FIEMAP and delayed allocation play well together
  ext4: suppress verbose debugging information if malloc-debug is off
  ...

Fi up conflicts in fs/ext4/super.c due to workqueue changes

21 files changed:
Documentation/ABI/testing/sysfs-fs-ext4
Documentation/filesystems/ext4.txt
fs/ext4/balloc.c
fs/ext4/ext4_jbd2.h
fs/ext4/extents.c
fs/ext4/fsync.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/mballoc.h
fs/ext4/migrate.c
fs/ext4/namei.c
fs/ext4/page-io.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/xattr.c
include/linux/jbd2.h
include/linux/journal-head.h
include/trace/events/ext4.h
include/trace/events/jbd2.h

index 5fb709997d9635ac4f5ee1d8bed00f207b9c3feb..f22ac0872ae8de41a44406edfab71384d54b830a 100644 (file)
@@ -48,7 +48,7 @@ Description:
                 will have its blocks allocated out of its own unique
                 preallocation pool.
 
-What:          /sys/fs/ext4/<disk>/inode_readahead
+What:          /sys/fs/ext4/<disk>/inode_readahead_blks
 Date:          March 2008
 Contact:       "Theodore Ts'o" <tytso@mit.edu>
 Description:
@@ -85,7 +85,14 @@ Date:                June 2008
 Contact:       "Theodore Ts'o" <tytso@mit.edu>
 Description:
                Tuning parameter which (if non-zero) controls the goal
-               inode used by the inode allocator in p0reference to
-               all other allocation hueristics.  This is intended for
+               inode used by the inode allocator in preference to
+               all other allocation heuristics.  This is intended for
                debugging use only, and should be 0 on production
                systems.
+
+What:          /sys/fs/ext4/<disk>/max_writeback_mb_bump
+Date:          September 2009
+Contact:       "Theodore Ts'o" <tytso@mit.edu>
+Description:
+               The maximum number of megabytes the writeback code will
+               try to write out before move on to another inode.
index 6ab9442d7eeb666e496e79acba6471b813207434..6b050464a90de62d1bd4b5c3d372baa044f36d16 100644 (file)
@@ -367,12 +367,47 @@ init_itable=n             The lazy itable init code will wait n times the
                        minimizes the impact on the systme performance
                        while file system's inode table is being initialized.
 
-discard                Controls whether ext4 should issue discard/TRIM
+discard                        Controls whether ext4 should issue discard/TRIM
 nodiscard(*)           commands to the underlying block device when
                        blocks are freed.  This is useful for SSD devices
                        and sparse/thinly-provisioned LUNs, but it is off
                        by default until sufficient testing has been done.
 
+nouid32                        Disables 32-bit UIDs and GIDs.  This is for
+                       interoperability  with  older kernels which only
+                       store and expect 16-bit values.
+
+resize                 Allows to resize filesystem to the end of the last
+                       existing block group, further resize has to be done
+                       with resize2fs either online, or offline. It can be
+                       used only with conjunction with remount.
+
+block_validity         This options allows to enables/disables the in-kernel
+noblock_validity       facility for tracking filesystem metadata blocks
+                       within internal data structures. This allows multi-
+                       block allocator and other routines to quickly locate
+                       extents which might overlap with filesystem metadata
+                       blocks. This option is intended for debugging
+                       purposes and since it negatively affects the
+                       performance, it is off by default.
+
+dioread_lock           Controls whether or not ext4 should use the DIO read
+dioread_nolock         locking. If the dioread_nolock option is specified
+                       ext4 will allocate uninitialized extent before buffer
+                       write and convert the extent to initialized after IO
+                       completes. This approach allows ext4 code to avoid
+                       using inode mutex, which improves scalability on high
+                       speed storages. However this does not work with nobh
+                       option and the mount will fail. Nor does it work with
+                       data journaling and dioread_nolock option will be
+                       ignored with kernel warning. Note that dioread_nolock
+                       code path is only used for extent-based files.
+                       Because of the restrictions this options comprises
+                       it is off by default (e.g. dioread_lock).
+
+i_version              Enable 64-bit inode version support. This option is
+                       off by default.
+
 Data Mode
 =========
 There are 3 different data modes:
@@ -400,6 +435,176 @@ needs to be read from and written to disk at the same time where it
 outperforms all others modes.  Currently ext4 does not have delayed
 allocation support if this data journalling mode is selected.
 
+/proc entries
+=============
+
+Information about mounted ext4 file systems can be found in
+/proc/fs/ext4.  Each mounted filesystem will have a directory in
+/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
+/proc/fs/ext4/dm-0).   The files in each per-device directory are shown
+in table below.
+
+Files in /proc/fs/ext4/<devname>
+..............................................................................
+ File            Content
+ mb_groups       details of multiblock allocator buddy cache of free blocks
+..............................................................................
+
+/sys entries
+============
+
+Information about mounted ext4 file systems can be found in
+/sys/fs/ext4.  Each mounted filesystem will have a directory in
+/sys/fs/ext4 based on its device name (i.e., /sys/fs/ext4/hdc or
+/sys/fs/ext4/dm-0).   The files in each per-device directory are shown
+in table below.
+
+Files in /sys/fs/ext4/<devname>
+(see also Documentation/ABI/testing/sysfs-fs-ext4)
+..............................................................................
+ File                         Content
+
+ delayed_allocation_blocks    This file is read-only and shows the number of
+                              blocks that are dirty in the page cache, but
+                              which do not have their location in the
+                              filesystem allocated yet.
+
+ inode_goal                   Tuning parameter which (if non-zero) controls
+                              the goal inode used by the inode allocator in
+                              preference to all other allocation heuristics.
+                              This is intended for debugging use only, and
+                              should be 0 on production systems.
+
+ inode_readahead_blks         Tuning parameter which controls the maximum
+                              number of inode table blocks that ext4's inode
+                              table readahead algorithm will pre-read into
+                              the buffer cache
+
+ lifetime_write_kbytes        This file is read-only and shows the number of
+                              kilobytes of data that have been written to this
+                              filesystem since it was created.
+
+ max_writeback_mb_bump        The maximum number of megabytes the writeback
+                              code will try to write out before move on to
+                              another inode.
+
+ mb_group_prealloc            The multiblock allocator will round up allocation
+                              requests to a multiple of this tuning parameter if
+                              the stripe size is not set in the ext4 superblock
+
+ mb_max_to_scan               The maximum number of extents the multiblock
+                              allocator will search to find the best extent
+
+ mb_min_to_scan               The minimum number of extents the multiblock
+                              allocator will search to find the best extent
+
+ mb_order2_req                Tuning parameter which controls the minimum size
+                              for requests (as a power of 2) where the buddy
+                              cache is used
+
+ mb_stats                     Controls whether the multiblock allocator should
+                              collect statistics, which are shown during the
+                              unmount. 1 means to collect statistics, 0 means
+                              not to collect statistics
+
+ mb_stream_req                Files which have fewer blocks than this tunable
+                              parameter will have their blocks allocated out
+                              of a block group specific preallocation pool, so
+                              that small files are packed closely together.
+                              Each large file will have its blocks allocated
+                              out of its own unique preallocation pool.
+
+ session_write_kbytes         This file is read-only and shows the number of
+                              kilobytes of data that have been written to this
+                              filesystem since it was mounted.
+..............................................................................
+
+Ioctls
+======
+
+There is some Ext4 specific functionality which can be accessed by applications
+through the system call interfaces. The list of all Ext4 specific ioctls are
+shown in the table below.
+
+Table of Ext4 specific ioctls
+..............................................................................
+ Ioctl                       Description
+ EXT4_IOC_GETFLAGS           Get additional attributes associated with inode.
+                             The ioctl argument is an integer bitfield, with
+                             bit values described in ext4.h. This ioctl is an
+                             alias for FS_IOC_GETFLAGS.
+
+ EXT4_IOC_SETFLAGS           Set additional attributes associated with inode.
+                             The ioctl argument is an integer bitfield, with
+                             bit values described in ext4.h. This ioctl is an
+                             alias for FS_IOC_SETFLAGS.
+
+ EXT4_IOC_GETVERSION
+ EXT4_IOC_GETVERSION_OLD
+                             Get the inode i_generation number stored for
+                             each inode. The i_generation number is normally
+                             changed only when new inode is created and it is
+                             particularly useful for network filesystems. The
+                             '_OLD' version of this ioctl is an alias for
+                             FS_IOC_GETVERSION.
+
+ EXT4_IOC_SETVERSION
+ EXT4_IOC_SETVERSION_OLD
+                             Set the inode i_generation number stored for
+                             each inode. The '_OLD' version of this ioctl
+                             is an alias for FS_IOC_SETVERSION.
+
+ EXT4_IOC_GROUP_EXTEND       This ioctl has the same purpose as the resize
+                             mount option. It allows to resize filesystem
+                             to the end of the last existing block group,
+                             further resize has to be done with resize2fs,
+                             either online, or offline. The argument points
+                             to the unsigned logn number representing the
+                             filesystem new block count.
+
+ EXT4_IOC_MOVE_EXT           Move the block extents from orig_fd (the one
+                             this ioctl is pointing to) to the donor_fd (the
+                             one specified in move_extent structure passed
+                             as an argument to this ioctl). Then, exchange
+                             inode metadata between orig_fd and donor_fd.
+                             This is especially useful for online
+                             defragmentation, because the allocator has the
+                             opportunity to allocate moved blocks better,
+                             ideally into one contiguous extent.
+
+ EXT4_IOC_GROUP_ADD          Add a new group descriptor to an existing or
+                             new group descriptor block. The new group
+                             descriptor is described by ext4_new_group_input
+                             structure, which is passed as an argument to
+                             this ioctl. This is especially useful in
+                             conjunction with EXT4_IOC_GROUP_EXTEND,
+                             which allows online resize of the filesystem
+                             to the end of the last existing block group.
+                             Those two ioctls combined is used in userspace
+                             online resize tool (e.g. resize2fs).
+
+ EXT4_IOC_MIGRATE            This ioctl operates on the filesystem itself.
+                             It converts (migrates) ext3 indirect block mapped
+                             inode to ext4 extent mapped inode by walking
+                             through indirect block mapping of the original
+                             inode and converting contiguous block ranges
+                             into ext4 extents of the temporary inode. Then,
+                             inodes are swapped. This ioctl might help, when
+                             migrating from ext3 to ext4 filesystem, however
+                             suggestion is to create fresh ext4 filesystem
+                             and copy data from the backup. Note, that
+                             filesystem has to support extents for this ioctl
+                             to work.
+
+ EXT4_IOC_ALLOC_DA_BLKS              Force all of the delay allocated blocks to be
+                             allocated to preserve application-expected ext3
+                             behaviour. Note that this will also start
+                             triggering a write of the data blocks, but this
+                             behaviour may change in the future as it is
+                             not necessary and has been done this way only
+                             for sake of simplicity.
+..............................................................................
+
 References
 ==========
 
index adf96b822781b25234ea4a3301bd6330aed01c92..97b970e7dd130abdcb86d4ece7c6526718e05cf0 100644 (file)
@@ -21,6 +21,8 @@
 #include "ext4_jbd2.h"
 #include "mballoc.h"
 
+#include <trace/events/ext4.h>
+
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -342,6 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
         * We do it here so the bitmap uptodate bit
         * get set with buffer lock held.
         */
+       trace_ext4_read_block_bitmap_load(sb, block_group);
        set_bitmap_uptodate(bh);
        if (bh_submit_read(bh) < 0) {
                put_bh(bh);
index d8b992e658c154796efadb80bb8a605ee365409e..e25e99bf7ee13321afc8702bf91506a08557f148 100644 (file)
@@ -202,13 +202,6 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
        return 1;
 }
 
-static inline void ext4_journal_release_buffer(handle_t *handle,
-                                               struct buffer_head *bh)
-{
-       if (ext4_handle_valid(handle))
-               jbd2_journal_release_buffer(handle, bh);
-}
-
 static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
 {
        return ext4_journal_start_sb(inode->i_sb, nblocks);
index 7516fb9c0bd5ade918540dc4ad8c2d0b53aee249..dd2cb5076ff9d0831486fbc79757763da0c9bbc8 100644 (file)
@@ -44,6 +44,8 @@
 #include "ext4_jbd2.h"
 #include "ext4_extents.h"
 
+#include <trace/events/ext4.h>
+
 static int ext4_ext_truncate_extend_restart(handle_t *handle,
                                            struct inode *inode,
                                            int needed)
@@ -664,6 +666,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                if (unlikely(!bh))
                        goto err;
                if (!bh_uptodate_or_lock(bh)) {
+                       trace_ext4_ext_load_extent(inode, block,
+                                               path[ppos].p_block);
                        if (bh_submit_read(bh) < 0) {
                                put_bh(bh);
                                goto err;
@@ -1034,7 +1038,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
-                       ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+                       ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
                                         EXT4_FREE_BLOCKS_METADATA);
                }
        }
@@ -2059,7 +2063,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        if (err)
                return err;
        ext_debug("index is empty, remove it, free block %llu\n", leaf);
-       ext4_free_blocks(handle, inode, 0, leaf, 1,
+       ext4_free_blocks(handle, inode, NULL, leaf, 1,
                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return err;
 }
@@ -2156,7 +2160,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                num = le32_to_cpu(ex->ee_block) + ee_len - from;
                start = ext4_ext_pblock(ex) + ee_len - num;
                ext_debug("free last %u blocks starting %llu\n", num, start);
-               ext4_free_blocks(handle, inode, 0, start, num, flags);
+               ext4_free_blocks(handle, inode, NULL, start, num, flags);
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
                printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -3108,14 +3112,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
 {
        int i, depth;
        struct ext4_extent_header *eh;
-       struct ext4_extent *ex, *last_ex;
+       struct ext4_extent *last_ex;
 
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
                return 0;
 
        depth = ext_depth(inode);
        eh = path[depth].p_hdr;
-       ex = path[depth].p_ext;
 
        if (unlikely(!eh->eh_entries)) {
                EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
@@ -3295,9 +3298,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        struct ext4_map_blocks *map, int flags)
 {
        struct ext4_ext_path *path = NULL;
-       struct ext4_extent_header *eh;
        struct ext4_extent newex, *ex;
-       ext4_fsblk_t newblock;
+       ext4_fsblk_t newblock = 0;
        int err = 0, depth, ret;
        unsigned int allocated = 0;
        struct ext4_allocation_request ar;
@@ -3305,6 +3307,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
        ext_debug("blocks %u/%u requested for inode %lu\n",
                  map->m_lblk, map->m_len, inode->i_ino);
+       trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
        /* check in cache */
        if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
@@ -3352,7 +3355,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                err = -EIO;
                goto out2;
        }
-       eh = path[depth].p_hdr;
 
        ex = path[depth].p_ext;
        if (ex) {
@@ -3485,7 +3487,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                /* not a good idea to call discard here directly,
                 * but otherwise we'd need to call it every free() */
                ext4_discard_preallocations(inode);
-               ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
+               ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
                                 ext4_ext_get_actual_len(&newex), 0);
                goto out2;
        }
@@ -3525,6 +3527,8 @@ out2:
                ext4_ext_drop_refs(path);
                kfree(path);
        }
+       trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+               newblock, map->m_len, err ? err : allocated);
        return err ? err : allocated;
 }
 
@@ -3658,6 +3662,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
 
+       trace_ext4_fallocate_enter(inode, offset, len, mode);
        map.m_lblk = offset >> blkbits;
        /*
         * We can't just convert len to max_blocks because
@@ -3673,6 +3678,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        ret = inode_newsize_ok(inode, (len + offset));
        if (ret) {
                mutex_unlock(&inode->i_mutex);
+               trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
                return ret;
        }
 retry:
@@ -3717,6 +3723,8 @@ retry:
                goto retry;
        }
        mutex_unlock(&inode->i_mutex);
+       trace_ext4_fallocate_exit(inode, offset, max_blocks,
+                               ret > 0 ? ret2 : ret);
        return ret > 0 ? ret2 : ret;
 }
 
@@ -3775,6 +3783,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
        }
        return ret > 0 ? ret2 : ret;
 }
+
 /*
  * Callback function called for each extent to gather FIEMAP information.
  */
@@ -3782,38 +3791,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
                       struct ext4_ext_cache *newex, struct ext4_extent *ex,
                       void *data)
 {
-       struct fiemap_extent_info *fieinfo = data;
-       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
        __u64   logical;
        __u64   physical;
        __u64   length;
+       loff_t  size;
        __u32   flags = 0;
-       int     error;
+       int             ret = 0;
+       struct fiemap_extent_info *fieinfo = data;
+       unsigned char blksize_bits;
 
-       logical =  (__u64)newex->ec_block << blksize_bits;
+       blksize_bits = inode->i_sb->s_blocksize_bits;
+       logical = (__u64)newex->ec_block << blksize_bits;
 
        if (newex->ec_start == 0) {
-               pgoff_t offset;
-               struct page *page;
+               /*
+                * No extent in extent-tree contains block @newex->ec_start,
+                * then the block may stay in 1)a hole or 2)delayed-extent.
+                *
+                * Holes or delayed-extents are processed as follows.
+                * 1. lookup dirty pages with specified range in pagecache.
+                *    If no page is got, then there is no delayed-extent and
+                *    return with EXT_CONTINUE.
+                * 2. find the 1st mapped buffer,
+                * 3. check if the mapped buffer is both in the request range
+                *    and a delayed buffer. If not, there is no delayed-extent,
+                *    then return.
+                * 4. a delayed-extent is found, the extent will be collected.
+                */
+               ext4_lblk_t     end = 0;
+               pgoff_t         last_offset;
+               pgoff_t         offset;
+               pgoff_t         index;
+               struct page     **pages = NULL;
                struct buffer_head *bh = NULL;
+               struct buffer_head *head = NULL;
+               unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
+
+               pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               if (pages == NULL)
+                       return -ENOMEM;
 
                offset = logical >> PAGE_SHIFT;
-               page = find_get_page(inode->i_mapping, offset);
-               if (!page || !page_has_buffers(page))
-                       return EXT_CONTINUE;
+repeat:
+               last_offset = offset;
+               head = NULL;
+               ret = find_get_pages_tag(inode->i_mapping, &offset,
+                                       PAGECACHE_TAG_DIRTY, nr_pages, pages);
+
+               if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+                       /* First time, try to find a mapped buffer. */
+                       if (ret == 0) {
+out:
+                               for (index = 0; index < ret; index++)
+                                       page_cache_release(pages[index]);
+                               /* just a hole. */
+                               kfree(pages);
+                               return EXT_CONTINUE;
+                       }
 
-               bh = page_buffers(page);
+                       /* Try to find the 1st mapped buffer. */
+                       end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
+                                 blksize_bits;
+                       if (!page_has_buffers(pages[0]))
+                               goto out;
+                       head = page_buffers(pages[0]);
+                       if (!head)
+                               goto out;
 
-               if (!bh)
-                       return EXT_CONTINUE;
+                       bh = head;
+                       do {
+                               if (buffer_mapped(bh)) {
+                                       /* get the 1st mapped buffer. */
+                                       if (end > newex->ec_block +
+                                               newex->ec_len)
+                                               /* The buffer is out of
+                                                * the request range.
+                                                */
+                                               goto out;
+                                       goto found_mapped_buffer;
+                               }
+                               bh = bh->b_this_page;
+                               end++;
+                       } while (bh != head);
 
-               if (buffer_delay(bh)) {
-                       flags |= FIEMAP_EXTENT_DELALLOC;
-                       page_cache_release(page);
+                       /* No mapped buffer found. */
+                       goto out;
                } else {
-                       page_cache_release(page);
-                       return EXT_CONTINUE;
+                       /*Find contiguous delayed buffers. */
+                       if (ret > 0 && pages[0]->index == last_offset)
+                               head = page_buffers(pages[0]);
+                       bh = head;
                }
+
+found_mapped_buffer:
+               if (bh != NULL && buffer_delay(bh)) {
+                       /* 1st or contiguous delayed buffer found. */
+                       if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+                               /*
+                                * 1st delayed buffer found, record
+                                * the start of extent.
+                                */
+                               flags |= FIEMAP_EXTENT_DELALLOC;
+                               newex->ec_block = end;
+                               logical = (__u64)end << blksize_bits;
+                       }
+                       /* Find contiguous delayed buffers. */
+                       do {
+                               if (!buffer_delay(bh))
+                                       goto found_delayed_extent;
+                               bh = bh->b_this_page;
+                               end++;
+                       } while (bh != head);
+
+                       for (index = 1; index < ret; index++) {
+                               if (!page_has_buffers(pages[index])) {
+                                       bh = NULL;
+                                       break;
+                               }
+                               head = page_buffers(pages[index]);
+                               if (!head) {
+                                       bh = NULL;
+                                       break;
+                               }
+                               if (pages[index]->index !=
+                                       pages[0]->index + index) {
+                                       /* Blocks are not contiguous. */
+                                       bh = NULL;
+                                       break;
+                               }
+                               bh = head;
+                               do {
+                                       if (!buffer_delay(bh))
+                                               /* Delayed-extent ends. */
+                                               goto found_delayed_extent;
+                                       bh = bh->b_this_page;
+                                       end++;
+                               } while (bh != head);
+                       }
+               } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
+                       /* a hole found. */
+                       goto out;
+
+found_delayed_extent:
+               newex->ec_len = min(end - newex->ec_block,
+                                               (ext4_lblk_t)EXT_INIT_MAX_LEN);
+               if (ret == nr_pages && bh != NULL &&
+                       newex->ec_len < EXT_INIT_MAX_LEN &&
+                       buffer_delay(bh)) {
+                       /* Have not collected an extent and continue. */
+                       for (index = 0; index < ret; index++)
+                               page_cache_release(pages[index]);
+                       goto repeat;
+               }
+
+               for (index = 0; index < ret; index++)
+                       page_cache_release(pages[index]);
+               kfree(pages);
        }
 
        physical = (__u64)newex->ec_start << blksize_bits;
@@ -3822,32 +3955,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
        if (ex && ext4_ext_is_uninitialized(ex))
                flags |= FIEMAP_EXTENT_UNWRITTEN;
 
-       /*
-        * If this extent reaches EXT_MAX_BLOCK, it must be last.
-        *
-        * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
-        * this also indicates no more allocated blocks.
-        *
-        * XXX this might miss a single-block extent at EXT_MAX_BLOCK
-        */
-       if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
-           newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
-               loff_t size = i_size_read(inode);
-               loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
-
+       size = i_size_read(inode);
+       if (logical + length >= size)
                flags |= FIEMAP_EXTENT_LAST;
-               if ((flags & FIEMAP_EXTENT_DELALLOC) &&
-                   logical+length > size)
-                       length = (size - logical + bs - 1) & ~(bs-1);
-       }
 
-       error = fiemap_fill_next_extent(fieinfo, logical, physical,
+       ret = fiemap_fill_next_extent(fieinfo, logical, physical,
                                        length, flags);
-       if (error < 0)
-               return error;
-       if (error == 1)
+       if (ret < 0)
+               return ret;
+       if (ret == 1)
                return EXT_BREAK;
-
        return EXT_CONTINUE;
 }
 
index 7829b287822a4207080c4089921e22418a855d93..7f74019d6d7766e0657d7d8620c128c7006af5b7 100644 (file)
@@ -164,20 +164,20 @@ int ext4_sync_file(struct file *file, int datasync)
 
        J_ASSERT(ext4_journal_current_handle() == NULL);
 
-       trace_ext4_sync_file(file, datasync);
+       trace_ext4_sync_file_enter(file, datasync);
 
        if (inode->i_sb->s_flags & MS_RDONLY)
                return 0;
 
        ret = ext4_flush_completed_IO(inode);
        if (ret < 0)
-               return ret;
+               goto out;
 
        if (!journal) {
                ret = generic_file_fsync(file, datasync);
                if (!ret && !list_empty(&inode->i_dentry))
                        ext4_sync_parent(inode);
-               return ret;
+               goto out;
        }
 
        /*
@@ -194,8 +194,10 @@ int ext4_sync_file(struct file *file, int datasync)
         *  (they were dirtied by commit).  But that's OK - the blocks are
         *  safe in-journal, which is all fsync() needs to ensure.
         */
-       if (ext4_should_journal_data(inode))
-               return ext4_force_commit(inode->i_sb);
+       if (ext4_should_journal_data(inode)) {
+               ret = ext4_force_commit(inode->i_sb);
+               goto out;
+       }
 
        commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
        if (jbd2_log_start_commit(journal, commit_tid)) {
@@ -215,5 +217,7 @@ int ext4_sync_file(struct file *file, int datasync)
                ret = jbd2_log_wait_commit(journal, commit_tid);
        } else if (journal->j_flags & JBD2_BARRIER)
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ out:
+       trace_ext4_sync_file_exit(inode, ret);
        return ret;
 }
index 78b79e1bd7ed2214af4399bd628fd4158d36410e..21bb2f61e50223c2da0946c4b48db0e4c947e1a7 100644 (file)
@@ -152,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
         * We do it here so the bitmap uptodate bit
         * get set with buffer lock held.
         */
+       trace_ext4_load_inode_bitmap(sb, block_group);
        set_bitmap_uptodate(bh);
        if (bh_submit_read(bh) < 0) {
                put_bh(bh);
@@ -649,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
                *group = parent_group + flex_size;
                if (*group > ngroups)
                        *group = 0;
-               return find_group_orlov(sb, parent, group, mode, 0);
+               return find_group_orlov(sb, parent, group, mode, NULL);
        }
 
        /*
@@ -1054,6 +1055,11 @@ got:
                }
        }
 
+       if (ext4_handle_valid(handle)) {
+               ei->i_sync_tid = handle->h_transaction->t_tid;
+               ei->i_datasync_tid = handle->h_transaction->t_tid;
+       }
+
        err = ext4_mark_inode_dirty(handle, inode);
        if (err) {
                ext4_std_error(sb, err);
index 9297ad46c4658ee3d7e05198754dc14789db8c2e..1a86282b90244c43fe75ae106d32c05b027b21d4 100644 (file)
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
        BUG_ON(EXT4_JOURNAL(inode) == NULL);
        jbd_debug(2, "restarting handle %p\n", handle);
        up_write(&EXT4_I(inode)->i_data_sem);
-       ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
+       ret = ext4_journal_restart(handle, nblocks);
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_discard_preallocations(inode);
 
@@ -720,7 +720,7 @@ allocated:
        return ret;
 failed_out:
        for (i = 0; i < index; i++)
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
        return ret;
 }
 
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
        return err;
 failed:
        /* Allocation failed, free what we already allocated */
-       ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
+       ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
        for (i = 1; i <= n ; i++) {
                /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
                 */
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
                                 EXT4_FREE_BLOCKS_FORGET);
        }
        for (i = n+1; i < indirect_blks; i++)
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
 
-       ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
+       ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
 
        return err;
 }
@@ -924,7 +924,7 @@ err_out:
                ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
                                 EXT4_FREE_BLOCKS_FORGET);
        }
-       ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+       ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
                         blks, 0);
 
        return err;
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
        int count = 0;
        ext4_fsblk_t first_block = 0;
 
+       trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
        J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
        J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
        depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@ -1058,6 +1059,8 @@ cleanup:
                partial--;
        }
 out:
+       trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
+                               map->m_pblk, map->m_len, err);
        return err;
 }
 
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                if (nr_pages == 0)
                        break;
                for (i = 0; i < nr_pages; i++) {
-                       int commit_write = 0, redirty_page = 0;
+                       int commit_write = 0, skip_page = 0;
                        struct page *page = pvec.pages[i];
 
                        index = page->index;
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                         * If the page does not have buffers (for
                         * whatever reason), try to create them using
                         * __block_write_begin.  If this fails,
-                        * redirty the page and move on.
+                        * skip the page and move on.
                         */
                        if (!page_has_buffers(page)) {
                                if (__block_write_begin(page, 0, len,
                                                noalloc_get_block_write)) {
-                               redirty_page:
-                                       redirty_page_for_writepage(mpd->wbc,
-                                                                  page);
+                               skip_page:
                                        unlock_page(page);
                                        continue;
                                }
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                        block_start = 0;
                        do {
                                if (!bh)
-                                       goto redirty_page;
+                                       goto skip_page;
                                if (map && (cur_logical >= map->m_lblk) &&
                                    (cur_logical <= (map->m_lblk +
                                                     (map->m_len - 1)))) {
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                                        clear_buffer_unwritten(bh);
                                }
 
-                               /* redirty page if block allocation undone */
+                               /* skip page if block allocation undone */
                                if (buffer_delay(bh) || buffer_unwritten(bh))
-                                       redirty_page = 1;
+                                       skip_page = 1;
                                bh = bh->b_this_page;
                                block_start += bh->b_size;
                                cur_logical++;
                                pblock++;
                        } while (bh != page_bufs);
 
-                       if (redirty_page)
-                               goto redirty_page;
+                       if (skip_page)
+                               goto skip_page;
 
                        if (commit_write)
                                /* mark the buffer_heads as dirty & uptodate */
                                block_commit_write(page, 0, len);
 
+                       clear_page_dirty_for_io(page);
                        /*
                         * Delalloc doesn't support data journalling,
                         * but eventually maybe we'll lift this
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
        return ret;
 }
 
-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
-                                       sector_t logical, long blk_cnt)
+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
 {
        int nr_pages, i;
        pgoff_t index, end;
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
        struct inode *inode = mpd->inode;
        struct address_space *mapping = inode->i_mapping;
 
-       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end   = (logical + blk_cnt - 1) >>
-                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       index = mpd->first_page;
+       end   = mpd->next_page - 1;
        while (index <= end) {
                nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
                if (nr_pages == 0)
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
                err = blks;
                /*
                 * If get block returns EAGAIN or ENOSPC and there
-                * appears to be free blocks we will call
-                * ext4_writepage() for all of the pages which will
-                * just redirty the pages.
+                * appears to be free blocks we will just let
+                * mpage_da_submit_io() unlock all of the pages.
                 */
                if (err == -EAGAIN)
                        goto submit_io;
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
                                ext4_print_free_blocks(mpd->inode);
                }
                /* invalidate all the pages */
-               ext4_da_block_invalidatepages(mpd, next,
-                               mpd->b_size >> mpd->inode->i_blkbits);
+               ext4_da_block_invalidatepages(mpd);
+
+               /* Mark this page range as having been completed */
+               mpd->io_done = 1;
                return;
        }
        BUG_ON(blks == 0);
@@ -2437,102 +2438,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
        return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
 }
 
-/*
- * __mpage_da_writepage - finds extent of pages and blocks
- *
- * @page: page to consider
- * @wbc: not used, we just follow rules
- * @data: context
- *
- * The function finds extents of pages and scan them for all blocks.
- */
-static int __mpage_da_writepage(struct page *page,
-                               struct writeback_control *wbc,
-                               struct mpage_da_data *mpd)
-{
-       struct inode *inode = mpd->inode;
-       struct buffer_head *bh, *head;
-       sector_t logical;
-
-       /*
-        * Can we merge this page to current extent?
-        */
-       if (mpd->next_page != page->index) {
-               /*
-                * Nope, we can't. So, we map non-allocated blocks
-                * and start IO on them
-                */
-               if (mpd->next_page != mpd->first_page) {
-                       mpage_da_map_and_submit(mpd);
-                       /*
-                        * skip rest of the page in the page_vec
-                        */
-                       redirty_page_for_writepage(wbc, page);
-                       unlock_page(page);
-                       return MPAGE_DA_EXTENT_TAIL;
-               }
-
-               /*
-                * Start next extent of pages ...
-                */
-               mpd->first_page = page->index;
-
-               /*
-                * ... and blocks
-                */
-               mpd->b_size = 0;
-               mpd->b_state = 0;
-               mpd->b_blocknr = 0;
-       }
-
-       mpd->next_page = page->index + 1;
-       logical = (sector_t) page->index <<
-                 (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
-       if (!page_has_buffers(page)) {
-               mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
-                                      (1 << BH_Dirty) | (1 << BH_Uptodate));
-               if (mpd->io_done)
-                       return MPAGE_DA_EXTENT_TAIL;
-       } else {
-               /*
-                * Page with regular buffer heads, just add all dirty ones
-                */
-               head = page_buffers(page);
-               bh = head;
-               do {
-                       BUG_ON(buffer_locked(bh));
-                       /*
-                        * We need to try to allocate
-                        * unmapped blocks in the same page.
-                        * Otherwise we won't make progress
-                        * with the page in ext4_writepage
-                        */
-                       if (ext4_bh_delay_or_unwritten(NULL, bh)) {
-                               mpage_add_bh_to_extent(mpd, logical,
-                                                      bh->b_size,
-                                                      bh->b_state);
-                               if (mpd->io_done)
-                                       return MPAGE_DA_EXTENT_TAIL;
-                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
-                               /*
-                                * mapped dirty buffer. We need to update
-                                * the b_state because we look at
-                                * b_state in mpage_da_map_blocks. We don't
-                                * update b_size because if we find an
-                                * unmapped buffer_head later we need to
-                                * use the b_state flag of that buffer_head.
-                                */
-                               if (mpd->b_size == 0)
-                                       mpd->b_state = bh->b_state & BH_FLAGS;
-                       }
-                       logical++;
-               } while ((bh = bh->b_this_page) != head);
-       }
-
-       return 0;
-}
-
 /*
  * This is a special get_blocks_t callback which is used by
  * ext4_da_write_begin().  It will either return mapped block or
@@ -2597,7 +2502,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                 * for partial write.
                 */
                set_buffer_new(bh);
-               set_buffer_mapped(bh);
        }
        return 0;
 }
@@ -2811,27 +2715,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
 
 /*
  * write_cache_pages_da - walk the list of dirty pages of the given
- * address space and call the callback function (which usually writes
- * the pages).
- *
- * This is a forked version of write_cache_pages().  Differences:
- *     Range cyclic is ignored.
- *     no_nrwrite_index_update is always presumed true
+ * address space and accumulate pages that need writing, and call
+ * mpage_da_map_and_submit to map a single contiguous memory region
+ * and then write them.
  */
 static int write_cache_pages_da(struct address_space *mapping,
                                struct writeback_control *wbc,
                                struct mpage_da_data *mpd,
                                pgoff_t *done_index)
 {
-       int ret = 0;
-       int done = 0;
-       struct pagevec pvec;
-       unsigned nr_pages;
-       pgoff_t index;
-       pgoff_t end;            /* Inclusive */
-       long nr_to_write = wbc->nr_to_write;
-       int tag;
-
+       struct buffer_head      *bh, *head;
+       struct inode            *inode = mapping->host;
+       struct pagevec          pvec;
+       unsigned int            nr_pages;
+       sector_t                logical;
+       pgoff_t                 index, end;
+       long                    nr_to_write = wbc->nr_to_write;
+       int                     i, tag, ret = 0;
+
+       memset(mpd, 0, sizeof(struct mpage_da_data));
+       mpd->wbc = wbc;
+       mpd->inode = inode;
        pagevec_init(&pvec, 0);
        index = wbc->range_start >> PAGE_CACHE_SHIFT;
        end = wbc->range_end >> PAGE_CACHE_SHIFT;
@@ -2842,13 +2746,11 @@ static int write_cache_pages_da(struct address_space *mapping,
                tag = PAGECACHE_TAG_DIRTY;
 
        *done_index = index;
-       while (!done && (index <= end)) {
-               int i;
-
+       while (index <= end) {
                nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
                              min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
                if (nr_pages == 0)
-                       break;
+                       return 0;
 
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
@@ -2860,60 +2762,100 @@ static int write_cache_pages_da(struct address_space *mapping,
                         * mapping. However, page->index will not change
                         * because we have a reference on the page.
                         */
-                       if (page->index > end) {
-                               done = 1;
-                               break;
-                       }
+                       if (page->index > end)
+                               goto out;
 
                        *done_index = page->index + 1;
 
+                       /*
+                        * If we can't merge this page, and we have
+                        * accumulated an contiguous region, write it
+                        */
+                       if ((mpd->next_page != page->index) &&
+                           (mpd->next_page != mpd->first_page)) {
+                               mpage_da_map_and_submit(mpd);
+                               goto ret_extent_tail;
+                       }
+
                        lock_page(page);
 
                        /*
-                        * Page truncated or invalidated. We can freely skip it
-                        * then, even for data integrity operations: the page
-                        * has disappeared concurrently, so there could be no
-                        * real expectation of this data interity operation
-                        * even if there is now a new, dirty page at the same
-                        * pagecache address.
+                        * If the page is no longer dirty, or its
+                        * mapping no longer corresponds to inode we
+                        * are writing (which means it has been
+                        * truncated or invalidated), or the page is
+                        * already under writeback and we are not
+                        * doing a data integrity writeback, skip the page
                         */
-                       if (unlikely(page->mapping != mapping)) {
-continue_unlock:
+                       if (!PageDirty(page) ||
+                           (PageWriteback(page) &&
+                            (wbc->sync_mode == WB_SYNC_NONE)) ||
+                           unlikely(page->mapping != mapping)) {
                                unlock_page(page);
                                continue;
                        }
 
-                       if (!PageDirty(page)) {
-                               /* someone wrote it for us */
-                               goto continue_unlock;
-                       }
-
-                       if (PageWriteback(page)) {
-                               if (wbc->sync_mode != WB_SYNC_NONE)
-                                       wait_on_page_writeback(page);
-                               else
-                                       goto continue_unlock;
-                       }
+                       if (PageWriteback(page))
+                               wait_on_page_writeback(page);
 
                        BUG_ON(PageWriteback(page));
-                       if (!clear_page_dirty_for_io(page))
-                               goto continue_unlock;
 
-                       ret = __mpage_da_writepage(page, wbc, mpd);
-                       if (unlikely(ret)) {
-                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
-                                       unlock_page(page);
-                                       ret = 0;
-                               } else {
-                                       done = 1;
-                                       break;
-                               }
+                       if (mpd->next_page != page->index)
+                               mpd->first_page = page->index;
+                       mpd->next_page = page->index + 1;
+                       logical = (sector_t) page->index <<
+                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+                       if (!page_has_buffers(page)) {
+                               mpage_add_bh_to_extent(mpd, logical,
+                                                      PAGE_CACHE_SIZE,
+                                                      (1 << BH_Dirty) | (1 << BH_Uptodate));
+                               if (mpd->io_done)
+                                       goto ret_extent_tail;
+                       } else {
+                               /*
+                                * Page with regular buffer heads,
+                                * just add all dirty ones
+                                */
+                               head = page_buffers(page);
+                               bh = head;
+                               do {
+                                       BUG_ON(buffer_locked(bh));
+                                       /*
+                                        * We need to try to allocate
+                                        * unmapped blocks in the same page.
+                                        * Otherwise we won't make progress
+                                        * with the page in ext4_writepage
+                                        */
+                                       if (ext4_bh_delay_or_unwritten(NULL, bh)) {
+                                               mpage_add_bh_to_extent(mpd, logical,
+                                                                      bh->b_size,
+                                                                      bh->b_state);
+                                               if (mpd->io_done)
+                                                       goto ret_extent_tail;
+                                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+                                               /*
+                                                * mapped dirty buffer. We need
+                                                * to update the b_state
+                                                * because we look at b_state
+                                                * in mpage_da_map_blocks.  We
+                                                * don't update b_size because
+                                                * if we find an unmapped
+                                                * buffer_head later we need to
+                                                * use the b_state flag of that
+                                                * buffer_head.
+                                                */
+                                               if (mpd->b_size == 0)
+                                                       mpd->b_state = bh->b_state & BH_FLAGS;
+                                       }
+                                       logical++;
+                               } while ((bh = bh->b_this_page) != head);
                        }
 
                        if (nr_to_write > 0) {
                                nr_to_write--;
                                if (nr_to_write == 0 &&
-                                   wbc->sync_mode == WB_SYNC_NONE) {
+                                   wbc->sync_mode == WB_SYNC_NONE)
                                        /*
                                         * We stop writing back only if we are
                                         * not doing integrity sync. In case of
@@ -2924,14 +2866,18 @@ continue_unlock:
                                         * pages, but have not synced all of the
                                         * old dirty pages.
                                         */
-                                       done = 1;
-                                       break;
-                               }
+                                       goto out;
                        }
                }
                pagevec_release(&pvec);
                cond_resched();
        }
+       return 0;
+ret_extent_tail:
+       ret = MPAGE_DA_EXTENT_TAIL;
+out:
+       pagevec_release(&pvec);
+       cond_resched();
        return ret;
 }
 
@@ -2945,7 +2891,6 @@ static int ext4_da_writepages(struct address_space *mapping,
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
        int pages_written = 0;
-       long pages_skipped;
        unsigned int max_pages;
        int range_cyclic, cycled = 1, io_done = 0;
        int needed_blocks, ret = 0;
@@ -3028,11 +2973,6 @@ static int ext4_da_writepages(struct address_space *mapping,
                wbc->nr_to_write = desired_nr_to_write;
        }
 
-       mpd.wbc = wbc;
-       mpd.inode = mapping->host;
-
-       pages_skipped = wbc->pages_skipped;
-
 retry:
        if (wbc->sync_mode == WB_SYNC_ALL)
                tag_pages_for_writeback(mapping, index, end);
@@ -3059,22 +2999,10 @@ retry:
                }
 
                /*
-                * Now call __mpage_da_writepage to find the next
+                * Now call write_cache_pages_da() to find the next
                 * contiguous region of logical blocks that need
-                * blocks to be allocated by ext4.  We don't actually
-                * submit the blocks for I/O here, even though
-                * write_cache_pages thinks it will, and will set the
-                * pages as clean for write before calling
-                * __mpage_da_writepage().
+                * blocks to be allocated by ext4 and submit them.
                 */
-               mpd.b_size = 0;
-               mpd.b_state = 0;
-               mpd.b_blocknr = 0;
-               mpd.first_page = 0;
-               mpd.next_page = 0;
-               mpd.io_done = 0;
-               mpd.pages_written = 0;
-               mpd.retval = 0;
                ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
                /*
                 * If we have a contiguous extent of pages and we
@@ -3096,7 +3024,6 @@ retry:
                         * and try again
                         */
                        jbd2_journal_force_commit_nested(sbi->s_journal);
-                       wbc->pages_skipped = pages_skipped;
                        ret = 0;
                } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                        /*
@@ -3104,7 +3031,6 @@ retry:
                         * rest of the pages
                         */
                        pages_written += mpd.pages_written;
-                       wbc->pages_skipped = pages_skipped;
                        ret = 0;
                        io_done = 1;
                } else if (wbc->nr_to_write)
@@ -3122,11 +3048,6 @@ retry:
                wbc->range_end  = mapping->writeback_index - 1;
                goto retry;
        }
-       if (pages_skipped != wbc->pages_skipped)
-               ext4_msg(inode->i_sb, KERN_CRIT,
-                        "This should not happen leaving %s "
-                        "with nr_to_write = %ld ret = %d",
-                        __func__, wbc->nr_to_write, ret);
 
        /* Update index */
        wbc->range_cyclic = range_cyclic;
@@ -3460,6 +3381,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 
 static int ext4_readpage(struct file *file, struct page *page)
 {
+       trace_ext4_readpage(page);
        return mpage_readpage(page, ext4_get_block);
 }
 
@@ -3494,6 +3416,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
 {
        journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
+       trace_ext4_invalidatepage(page, offset);
+
        /*
         * free any io_end structure allocated for buffers to be discarded
         */
@@ -3515,6 +3439,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 {
        journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
+       trace_ext4_releasepage(page);
+
        WARN_ON(PageChecked(page));
        if (!page_has_buffers(page))
                return 0;
@@ -3873,11 +3799,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       ssize_t ret;
 
+       trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
-
-       return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+               ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+       else
+               ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+       trace_ext4_direct_IO_exit(inode, offset,
+                               iov_length(iov, nr_segs), rw, ret);
+       return ret;
 }
 
 /*
@@ -4173,6 +4104,9 @@ no_top:
  *
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
+ *
+ * Return 0 on success, 1 on invalid block range
+ * and < 0 on fatal error.
  */
 static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
                             struct buffer_head *bh,
@@ -4199,33 +4133,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
                if (bh) {
                        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
                        err = ext4_handle_dirty_metadata(handle, inode, bh);
-                       if (unlikely(err)) {
-                               ext4_std_error(inode->i_sb, err);
-                               return 1;
-                       }
+                       if (unlikely(err))
+                               goto out_err;
                }
                err = ext4_mark_inode_dirty(handle, inode);
-               if (unlikely(err)) {
-                       ext4_std_error(inode->i_sb, err);
-                       return 1;
-               }
+               if (unlikely(err))
+                       goto out_err;
                err = ext4_truncate_restart_trans(handle, inode,
                                                  blocks_for_truncate(inode));
-               if (unlikely(err)) {
-                       ext4_std_error(inode->i_sb, err);
-                       return 1;
-               }
+               if (unlikely(err))
+                       goto out_err;
                if (bh) {
                        BUFFER_TRACE(bh, "retaking write access");
-                       ext4_journal_get_write_access(handle, bh);
+                       err = ext4_journal_get_write_access(handle, bh);
+                       if (unlikely(err))
+                               goto out_err;
                }
        }
 
        for (p = first; p < last; p++)
                *p = 0;
 
-       ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
+       ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
        return 0;
+out_err:
+       ext4_std_error(inode->i_sb, err);
+       return err;
 }
 
 /**
@@ -4259,7 +4192,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
        ext4_fsblk_t nr;                    /* Current block # */
        __le32 *p;                          /* Pointer into inode/ind
                                               for current block */
-       int err;
+       int err = 0;
 
        if (this_bh) {                          /* For indirect block */
                BUFFER_TRACE(this_bh, "get_write_access");
@@ -4281,9 +4214,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
                        } else if (nr == block_to_free + count) {
                                count++;
                        } else {
-                               if (ext4_clear_blocks(handle, inode, this_bh,
-                                                     block_to_free, count,
-                                                     block_to_free_p, p))
+                               err = ext4_clear_blocks(handle, inode, this_bh,
+                                                       block_to_free, count,
+                                                       block_to_free_p, p);
+                               if (err)
                                        break;
                                block_to_free = nr;
                                block_to_free_p = p;
@@ -4292,9 +4226,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
                }
        }
 
-       if (count > 0)
-               ext4_clear_blocks(handle, inode, this_bh, block_to_free,
-                                 count, block_to_free_p, p);
+       if (!err && count > 0)
+               err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
+                                       count, block_to_free_p, p);
+       if (err < 0)
+               /* fatal error */
+               return;
 
        if (this_bh) {
                BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@ -4412,7 +4349,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                         * transaction where the data blocks are
                         * actually freed.
                         */
-                       ext4_free_blocks(handle, inode, 0, nr, 1,
+                       ext4_free_blocks(handle, inode, NULL, nr, 1,
                                         EXT4_FREE_BLOCKS_METADATA|
                                         EXT4_FREE_BLOCKS_FORGET);
 
@@ -4496,6 +4433,8 @@ void ext4_truncate(struct inode *inode)
        ext4_lblk_t last_block;
        unsigned blocksize = inode->i_sb->s_blocksize;
 
+       trace_ext4_truncate_enter(inode);
+
        if (!ext4_can_truncate(inode))
                return;
 
@@ -4506,6 +4445,7 @@ void ext4_truncate(struct inode *inode)
 
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ext4_ext_truncate(inode);
+               trace_ext4_truncate_exit(inode);
                return;
        }
 
@@ -4635,6 +4575,7 @@ out_stop:
                ext4_orphan_del(handle, inode);
 
        ext4_journal_stop(handle);
+       trace_ext4_truncate_exit(inode);
 }
 
 /*
@@ -4766,6 +4707,7 @@ make_io:
                 * has in-inode xattrs, or we don't have this inode in memory.
                 * Read the block from disk.
                 */
+               trace_ext4_load_inode(inode);
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
                submit_bh(READ_META, bh);
@@ -4871,7 +4813,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                return inode;
 
        ei = EXT4_I(inode);
-       iloc.bh = 0;
+       iloc.bh = NULL;
 
        ret = __ext4_get_inode_loc(inode, &iloc, 0);
        if (ret < 0)
index a84faa110bcda3b0cfdb4992c83cc422766e74be..808c554e773fdc2658c4708f1697edabab665acc 100644 (file)
@@ -334,16 +334,22 @@ mext_out:
        case FITRIM:
        {
                struct super_block *sb = inode->i_sb;
+               struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
 
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
+               if (!blk_queue_discard(q))
+                       return -EOPNOTSUPP;
+
                if (copy_from_user(&range, (struct fstrim_range *)arg,
                    sizeof(range)))
                        return -EFAULT;
 
+               range.minlen = max((unsigned int)range.minlen,
+                                  q->limits.discard_granularity);
                ret = ext4_trim_fs(sb, &range);
                if (ret < 0)
                        return ret;
@@ -421,6 +427,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                return err;
        }
        case EXT4_IOC_MOVE_EXT:
+       case FITRIM:
                break;
        default:
                return -ENOIOCTLCMD;
index d1fe09aea73dc92419b0bf574694b6add33d7d8e..a5837a837a8bfea8e835563054c99b6fde0897e4 100644 (file)
@@ -432,9 +432,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
        }
 
        /* at order 0 we see each particular block */
-       *max = 1 << (e4b->bd_blkbits + 3);
-       if (order == 0)
+       if (order == 0) {
+               *max = 1 << (e4b->bd_blkbits + 3);
                return EXT4_MB_BITMAP(e4b);
+       }
 
        bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
        *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
@@ -616,7 +617,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
        MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
 
        grp = ext4_get_group_info(sb, e4b->bd_group);
-       buddy = mb_find_buddy(e4b, 0, &max);
        list_for_each(cur, &grp->bb_prealloc_list) {
                ext4_group_t groupnr;
                struct ext4_prealloc_space *pa;
@@ -635,7 +635,12 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
 #define mb_check_buddy(e4b)
 #endif
 
-/* FIXME!! need more doc */
+/*
+ * Divide blocks started from @first with length @len into
+ * smaller chunks with power of 2 blocks.
+ * Clear the bits in bitmap which the blocks of the chunk(s) covered,
+ * then increase bb_counters[] for corresponded chunk size.
+ */
 static void ext4_mb_mark_free_simple(struct super_block *sb,
                                void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
                                        struct ext4_group_info *grp)
@@ -2381,7 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
        /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
         * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
         * So a two level scheme suffices for now. */
-       sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
+       sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
        if (sbi->s_group_info == NULL) {
                printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
                return -ENOMEM;
@@ -3208,7 +3213,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
        cur_distance = abs(goal_block - cpa->pa_pstart);
        new_distance = abs(goal_block - pa->pa_pstart);
 
-       if (cur_distance < new_distance)
+       if (cur_distance <= new_distance)
                return cpa;
 
        /* drop the previous reference */
@@ -3907,7 +3912,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
        struct super_block *sb = ac->ac_sb;
        ext4_group_t ngroups, i;
 
-       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+       if (!mb_enable_debug ||
+           (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
                return;
 
        printk(KERN_ERR "EXT4-fs: Can't allocate:"
@@ -4753,7 +4759,8 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
  * bitmap. Then issue a TRIM command on this extent and free the extent in
  * the group buddy bitmap. This is done until whole group is scanned.
  */
-ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+static ext4_grpblk_t
+ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
                ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
 {
        void *bitmap;
@@ -4863,10 +4870,15 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
                        break;
                }
 
-               if (len >= EXT4_BLOCKS_PER_GROUP(sb))
-                       len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
-               else
+               /*
+                * For all the groups except the last one, last block will
+                * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to
+                * change it for the last group in which case start +
+                * len < EXT4_BLOCKS_PER_GROUP(sb).
+                */
+               if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
                        last_block = first_block + len;
+               len -= last_block - first_block;
 
                if (e4b.bd_info->bb_free >= minlen) {
                        cnt = ext4_trim_all_free(sb, &e4b, first_block,
index b619322c76f0c5ea56d831b787d77e2911657a25..22bd4d7f289b834b277fb55ce92cefa019c9d354 100644 (file)
@@ -169,7 +169,7 @@ struct ext4_allocation_context {
        /* original request */
        struct ext4_free_extent ac_o_ex;
 
-       /* goal request (after normalization) */
+       /* goal request (normalized ac_o_ex) */
        struct ext4_free_extent ac_g_ex;
 
        /* the best found extent */
index b0a126f23c20cd70f662834411fdf870910a575f..d1bafa57f48367d7403ba4ef74734ece57349778 100644 (file)
@@ -263,7 +263,7 @@ static int free_dind_blocks(handle_t *handle,
        for (i = 0; i < max_entries; i++) {
                if (tmp_idata[i]) {
                        extend_credit_for_blkdel(handle, inode);
-                       ext4_free_blocks(handle, inode, 0,
+                       ext4_free_blocks(handle, inode, NULL,
                                         le32_to_cpu(tmp_idata[i]), 1,
                                         EXT4_FREE_BLOCKS_METADATA |
                                         EXT4_FREE_BLOCKS_FORGET);
@@ -271,7 +271,7 @@ static int free_dind_blocks(handle_t *handle,
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+       ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
                         EXT4_FREE_BLOCKS_METADATA |
                         EXT4_FREE_BLOCKS_FORGET);
        return 0;
@@ -302,7 +302,7 @@ static int free_tind_blocks(handle_t *handle,
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+       ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
                         EXT4_FREE_BLOCKS_METADATA |
                         EXT4_FREE_BLOCKS_FORGET);
        return 0;
@@ -315,7 +315,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
        /* ei->i_data[EXT4_IND_BLOCK] */
        if (i_data[0]) {
                extend_credit_for_blkdel(handle, inode);
-               ext4_free_blocks(handle, inode, 0,
+               ext4_free_blocks(handle, inode, NULL,
                                le32_to_cpu(i_data[0]), 1,
                                 EXT4_FREE_BLOCKS_METADATA |
                                 EXT4_FREE_BLOCKS_FORGET);
@@ -428,7 +428,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, 0, block, 1,
+       ext4_free_blocks(handle, inode, NULL, block, 1,
                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return retval;
 }
index e781b7ea56305dfde5c7458c78294a5dcc6d9361..67fd0b0258589ae64428d26530807b898e79854b 100644 (file)
@@ -40,6 +40,7 @@
 #include "xattr.h"
 #include "acl.h"
 
+#include <trace/events/ext4.h>
 /*
  * define how far ahead to read directories while searching them.
  */
@@ -2183,6 +2184,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        struct ext4_dir_entry_2 *de;
        handle_t *handle;
 
+       trace_ext4_unlink_enter(dir, dentry);
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
        dquot_initialize(dir);
@@ -2228,6 +2230,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 end_unlink:
        ext4_journal_stop(handle);
        brelse(bh);
+       trace_ext4_unlink_exit(dentry, retval);
        return retval;
 }
 
@@ -2402,6 +2405,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                if (!new_inode && new_dir != old_dir &&
                    EXT4_DIR_LINK_MAX(new_dir))
                        goto end_rename;
+               BUFFER_TRACE(dir_bh, "get_write_access");
+               retval = ext4_journal_get_write_access(handle, dir_bh);
+               if (retval)
+                       goto end_rename;
        }
        if (!new_bh) {
                retval = ext4_add_entry(handle, new_dentry, old_inode);
@@ -2409,7 +2416,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                        goto end_rename;
        } else {
                BUFFER_TRACE(new_bh, "get write access");
-               ext4_journal_get_write_access(handle, new_bh);
+               retval = ext4_journal_get_write_access(handle, new_bh);
+               if (retval)
+                       goto end_rename;
                new_de->inode = cpu_to_le32(old_inode->i_ino);
                if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
                                              EXT4_FEATURE_INCOMPAT_FILETYPE))
@@ -2470,8 +2479,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
        ext4_update_dx_flag(old_dir);
        if (dir_bh) {
-               BUFFER_TRACE(dir_bh, "get_write_access");
-               ext4_journal_get_write_access(handle, dir_bh);
                PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
                                                cpu_to_le32(new_dir->i_ino);
                BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
index e2cd90e4bb7c9e20cd0c2d274ac6f0b372eda5ba..b6dbd056fcb1d7f532f428e34cae4ef5248680ce 100644 (file)
@@ -259,6 +259,11 @@ static void ext4_end_bio(struct bio *bio, int error)
                             bi_sector >> (inode->i_blkbits - 9));
        }
 
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               ext4_free_io_end(io_end);
+               return;
+       }
+
        /* Add the io_end to per-inode completed io list*/
        spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
        list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
@@ -279,9 +284,9 @@ void ext4_io_submit(struct ext4_io_submit *io)
                BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
                bio_put(io->io_bio);
        }
-       io->io_bio = 0;
+       io->io_bio = NULL;
        io->io_op = 0;
-       io->io_end = 0;
+       io->io_end = NULL;
 }
 
 static int io_submit_init(struct ext4_io_submit *io,
@@ -380,8 +385,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 
        BUG_ON(!PageLocked(page));
        BUG_ON(PageWriteback(page));
-       set_page_writeback(page);
-       ClearPageError(page);
 
        io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
        if (!io_page) {
@@ -392,6 +395,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
        io_page->p_page = page;
        atomic_set(&io_page->p_count, 1);
        get_page(page);
+       set_page_writeback(page);
+       ClearPageError(page);
 
        for (bh = head = page_buffers(page), block_start = 0;
             bh != head || !block_start;
index 3ecc6e45d2f93568c5ccc298058a0d859a1768e5..80bbc9c60c247659047b805c9a74155bf2baeb14 100644 (file)
@@ -230,7 +230,7 @@ static int setup_new_group_blocks(struct super_block *sb,
        }
 
        /* Zero out all of the reserved backup group descriptor table blocks */
-       ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+       ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
                        block, sbi->s_itb_per_group);
        err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
                               GFP_NOFS);
@@ -248,7 +248,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 
        /* Zero out all of the inode table blocks */
        block = input->inode_table;
-       ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+       ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
                        block, sbi->s_itb_per_group);
        err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
        if (err)
@@ -499,12 +499,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        return err;
 
 exit_inode:
-       /* ext4_journal_release_buffer(handle, iloc.bh); */
+       /* ext4_handle_release_buffer(handle, iloc.bh); */
        brelse(iloc.bh);
 exit_dindj:
-       /* ext4_journal_release_buffer(handle, dind); */
+       /* ext4_handle_release_buffer(handle, dind); */
 exit_sbh:
-       /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
+       /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
 exit_dind:
        brelse(dind);
 exit_bh:
@@ -586,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
                        /*
                        int j;
                        for (j = 0; j < i; j++)
-                               ext4_journal_release_buffer(handle, primary[j]);
+                               ext4_handle_release_buffer(handle, primary[j]);
                         */
                        goto exit_bh;
                }
index 203f9e4a70be3afe974d492d9ca3c7f5c089738f..22546ad7f0aea7d2e5b6215c89eb9f24f1537c14 100644 (file)
@@ -54,9 +54,9 @@
 
 static struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
-struct ext4_lazy_init *ext4_li_info;
-struct mutex ext4_li_mtx;
-struct ext4_features *ext4_feat;
+static struct ext4_lazy_init *ext4_li_info;
+static struct mutex ext4_li_mtx;
+static struct ext4_features *ext4_feat;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
@@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb);
 static int ext4_freeze(struct super_block *sb);
 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
                       const char *dev_name, void *data);
+static int ext4_feature_set_ok(struct super_block *sb, int readonly);
 static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 static void ext4_clear_request_list(void);
@@ -594,7 +595,7 @@ __acquires(bitlock)
 
        vaf.fmt = fmt;
        vaf.va = &args;
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
               sb->s_id, function, line, grp);
        if (ino)
                printk(KERN_CONT "inode %lu: ", ino);
@@ -997,13 +998,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
        if (test_opt(sb, OLDALLOC))
                seq_puts(seq, ",oldalloc");
 #ifdef CONFIG_EXT4_FS_XATTR
-       if (test_opt(sb, XATTR_USER) &&
-               !(def_mount_opts & EXT4_DEFM_XATTR_USER))
+       if (test_opt(sb, XATTR_USER))
                seq_puts(seq, ",user_xattr");
-       if (!test_opt(sb, XATTR_USER) &&
-           (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
+       if (!test_opt(sb, XATTR_USER))
                seq_puts(seq, ",nouser_xattr");
-       }
 #endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
        if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
@@ -1041,8 +1039,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
            !(def_mount_opts & EXT4_DEFM_NODELALLOC))
                seq_puts(seq, ",nodelalloc");
 
-       if (test_opt(sb, MBLK_IO_SUBMIT))
-               seq_puts(seq, ",mblk_io_submit");
+       if (!test_opt(sb, MBLK_IO_SUBMIT))
+               seq_puts(seq, ",nomblk_io_submit");
        if (sbi->s_stripe)
                seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
        /*
@@ -1451,7 +1449,7 @@ static int parse_options(char *options, struct super_block *sb,
                 * Initialize args struct so we know whether arg was
                 * found; some options take optional arguments.
                 */
-               args[0].to = args[0].from = 0;
+               args[0].to = args[0].from = NULL;
                token = match_token(p, tokens, args);
                switch (token) {
                case Opt_bsd_df:
@@ -1771,7 +1769,7 @@ set_qf_format:
                                return 0;
                        if (option < 0 || option > (1 << 30))
                                return 0;
-                       if (!is_power_of_2(option)) {
+                       if (option && !is_power_of_2(option)) {
                                ext4_msg(sb, KERN_ERR,
                                         "EXT4-fs: inode_readahead_blks"
                                         " must be a power of 2");
@@ -2120,6 +2118,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                return;
        }
 
+       /* Check if feature set would not allow a r/w mount */
+       if (!ext4_feature_set_ok(sb, 0)) {
+               ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+                        "unknown ROCOMPAT features");
+               return;
+       }
+
        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
                if (es->s_last_orphan)
                        jbd_debug(1, "Errors on filesystem, "
@@ -2412,7 +2417,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
        if (parse_strtoul(buf, 0x40000000, &t))
                return -EINVAL;
 
-       if (!is_power_of_2(t))
+       if (t && !is_power_of_2(t))
                return -EINVAL;
 
        sbi->s_inode_readahead_blks = t;
@@ -3095,14 +3100,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
        if (def_mount_opts & EXT4_DEFM_UID16)
                set_opt(sb, NO_UID32);
+       /* xattr user namespace & acls are now defaulted on */
 #ifdef CONFIG_EXT4_FS_XATTR
-       if (def_mount_opts & EXT4_DEFM_XATTR_USER)
-               set_opt(sb, XATTR_USER);
+       set_opt(sb, XATTR_USER);
 #endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
-       if (def_mount_opts & EXT4_DEFM_ACL)
-               set_opt(sb, POSIX_ACL);
+       set_opt(sb, POSIX_ACL);
 #endif
+       set_opt(sb, MBLK_IO_SUBMIT);
        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
                set_opt(sb, JOURNAL_DATA);
        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3516,7 +3521,7 @@ no_journal:
         * concurrency isn't really necessary.  Limit it to 1.
         */
        EXT4_SB(sb)->dio_unwritten_wq =
-               alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
+               alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
        if (!EXT4_SB(sb)->dio_unwritten_wq) {
                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
                goto failed_mount_wq;
@@ -3531,17 +3536,16 @@ no_journal:
        if (IS_ERR(root)) {
                ext4_msg(sb, KERN_ERR, "get root inode failed");
                ret = PTR_ERR(root);
+               root = NULL;
                goto failed_mount4;
        }
        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
-               iput(root);
                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
                goto failed_mount4;
        }
        sb->s_root = d_alloc_root(root);
        if (!sb->s_root) {
                ext4_msg(sb, KERN_ERR, "get root dentry failed");
-               iput(root);
                ret = -ENOMEM;
                goto failed_mount4;
        }
@@ -3657,6 +3661,8 @@ cantfind_ext4:
        goto failed_mount;
 
 failed_mount4:
+       iput(root);
+       sb->s_root = NULL;
        ext4_msg(sb, KERN_ERR, "mount failed");
        destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
 failed_mount_wq:
index fc32176eee39ede67320bebeb44a80812d7d0f21..b545ca1c459c42e2cc6aef35ce49dea3fcd2694d 100644 (file)
@@ -735,7 +735,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                        int offset = (char *)s->here - bs->bh->b_data;
 
                        unlock_buffer(bs->bh);
-                       jbd2_journal_release_buffer(handle, bs->bh);
+                       ext4_handle_release_buffer(handle, bs->bh);
                        if (ce) {
                                mb_cache_entry_release(ce);
                                ce = NULL;
@@ -833,7 +833,7 @@ inserted:
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
 getblk_failed:
-                               ext4_free_blocks(handle, inode, 0, block, 1,
+                               ext4_free_blocks(handle, inode, NULL, block, 1,
                                                 EXT4_FREE_BLOCKS_METADATA);
                                error = -EIO;
                                goto cleanup;
index 27e79c27ba0869685d128a6b31377601a4585598..a32dcaec04e147917a3e085a5e83146189782336 100644 (file)
@@ -432,13 +432,35 @@ struct jbd2_journal_handle
        int                     h_err;
 
        /* Flags [no locking] */
-       unsigned int    h_sync:         1;      /* sync-on-close */
-       unsigned int    h_jdata:        1;      /* force data journaling */
-       unsigned int    h_aborted:      1;      /* fatal error on handle */
+       unsigned int    h_sync:1;       /* sync-on-close */
+       unsigned int    h_jdata:1;      /* force data journaling */
+       unsigned int    h_aborted:1;    /* fatal error on handle */
+       unsigned int    h_cowing:1;     /* COWing block to snapshot */
+
+       /* Number of buffers requested by user:
+        * (before adding the COW credits factor) */
+       unsigned int    h_base_credits:14;
+
+       /* Number of buffers the user is allowed to dirty:
+        * (counts only buffers dirtied when !h_cowing) */
+       unsigned int    h_user_credits:14;
+
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map      h_lockdep_map;
 #endif
+
+#ifdef CONFIG_JBD2_DEBUG
+       /* COW debugging counters: */
+       unsigned int h_cow_moved; /* blocks moved to snapshot */
+       unsigned int h_cow_copied; /* blocks copied to snapshot */
+       unsigned int h_cow_ok_jh; /* blocks already COWed during current
+                                    transaction */
+       unsigned int h_cow_ok_bitmap; /* blocks not set in COW bitmap */
+       unsigned int h_cow_ok_mapped;/* blocks already mapped in snapshot */
+       unsigned int h_cow_bitmaps; /* COW bitmaps created */
+       unsigned int h_cow_excluded; /* blocks set in exclude bitmap */
+#endif
 };
 
 
index 525aac3c97dfeca86b1a6801d07aec59c9e8599d..44e95d0a721f1eb50db7e41df52a15403ddf814d 100644 (file)
@@ -40,6 +40,13 @@ struct journal_head {
         */
        unsigned b_modified;
 
+       /*
+        * This feild tracks the last transaction id in which this buffer
+        * has been cowed
+        * [jbd_lock_bh_state()]
+        */
+       unsigned b_cow_tid;
+
        /*
         * Copy of the buffer data frozen for writing to the log.
         * [jbd_lock_bh_state()]
index e5e345fb2a5c37db45de06bcdf02f3796aec64c4..e09592d2f916adfdf041272e3b7a672379f7eea1 100644 (file)
@@ -21,8 +21,7 @@ TRACE_EVENT(ext4_free_inode,
        TP_ARGS(inode),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        umode_t, mode                   )
                __field(        uid_t,  uid                     )
@@ -31,8 +30,7 @@ TRACE_EVENT(ext4_free_inode,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->mode   = inode->i_mode;
                __entry->uid    = inode->i_uid;
@@ -41,9 +39,9 @@ TRACE_EVENT(ext4_free_inode,
        ),
 
        TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
-                 __entry->dev_major, __entry->dev_minor,
-                 (unsigned long) __entry->ino, __entry->mode,
-                 __entry->uid, __entry->gid,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode, __entry->uid, __entry->gid,
                  (unsigned long long) __entry->blocks)
 );
 
@@ -53,21 +51,19 @@ TRACE_EVENT(ext4_request_inode,
        TP_ARGS(dir, mode),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  dir                     )
                __field(        umode_t, mode                   )
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(dir->i_sb->s_dev);
-               __entry->dev_minor = MINOR(dir->i_sb->s_dev);
+               __entry->dev    = dir->i_sb->s_dev;
                __entry->dir    = dir->i_ino;
                __entry->mode   = mode;
        ),
 
        TP_printk("dev %d,%d dir %lu mode 0%o",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->dir, __entry->mode)
 );
 
@@ -77,23 +73,21 @@ TRACE_EVENT(ext4_allocate_inode,
        TP_ARGS(inode, dir, mode),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        ino_t,  dir                     )
                __field(        umode_t, mode                   )
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->dir    = dir->i_ino;
                __entry->mode   = mode;
        ),
 
        TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  (unsigned long) __entry->dir, __entry->mode)
 );
@@ -104,21 +98,19 @@ TRACE_EVENT(ext4_evict_inode,
        TP_ARGS(inode),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        int,    nlink                   )
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->nlink  = inode->i_nlink;
        ),
 
        TP_printk("dev %d,%d ino %lu nlink %d",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino, __entry->nlink)
 );
 
@@ -128,21 +120,19 @@ TRACE_EVENT(ext4_drop_inode,
        TP_ARGS(inode, drop),
 
        TP_STRUCT__entry(
-               __field(        int,    dev_major               )
-               __field(        int,    dev_minor               )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        int,    drop                    )
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->drop   = drop;
        ),
 
        TP_printk("dev %d,%d ino %lu drop %d",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino, __entry->drop)
 );
 
@@ -152,21 +142,19 @@ TRACE_EVENT(ext4_mark_inode_dirty,
        TP_ARGS(inode, IP),
 
        TP_STRUCT__entry(
-               __field(        int,    dev_major               )
-               __field(        int,    dev_minor               )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(unsigned long,  ip                      )
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->ip     = IP;
        ),
 
        TP_printk("dev %d,%d ino %lu caller %pF",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino, (void *)__entry->ip)
 );
 
@@ -176,21 +164,19 @@ TRACE_EVENT(ext4_begin_ordered_truncate,
        TP_ARGS(inode, new_size),
 
        TP_STRUCT__entry(
-               __field(        int,    dev_major               )
-               __field(        int,    dev_minor               )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        loff_t, new_size                )
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor      = MINOR(inode->i_sb->s_dev);
+               __entry->dev            = inode->i_sb->s_dev;
                __entry->ino            = inode->i_ino;
                __entry->new_size       = new_size;
        ),
 
        TP_printk("dev %d,%d ino %lu new_size %lld",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  (long long) __entry->new_size)
 );
@@ -203,8 +189,7 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
        TP_ARGS(inode, pos, len, flags),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        loff_t, pos                     )
                __field(        unsigned int, len               )
@@ -212,8 +197,7 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->pos    = pos;
                __entry->len    = len;
@@ -221,7 +205,7 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
        ),
 
        TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->pos, __entry->len, __entry->flags)
 );
@@ -249,8 +233,7 @@ DECLARE_EVENT_CLASS(ext4__write_end,
        TP_ARGS(inode, pos, len, copied),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        loff_t, pos                     )
                __field(        unsigned int, len               )
@@ -258,8 +241,7 @@ DECLARE_EVENT_CLASS(ext4__write_end,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->pos    = pos;
                __entry->len    = len;
@@ -267,9 +249,9 @@ DECLARE_EVENT_CLASS(ext4__write_end,
        ),
 
        TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
-                 __entry->dev_major, __entry->dev_minor,
-                 (unsigned long) __entry->ino, __entry->pos,
-                 __entry->len, __entry->copied)
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->pos, __entry->len, __entry->copied)
 );
 
 DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
@@ -310,22 +292,20 @@ TRACE_EVENT(ext4_writepage,
        TP_ARGS(inode, page),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        pgoff_t, index                  )
 
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->index  = page->index;
        ),
 
        TP_printk("dev %d,%d ino %lu page_index %lu",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino, __entry->index)
 );
 
@@ -335,43 +315,39 @@ TRACE_EVENT(ext4_da_writepages,
        TP_ARGS(inode, wbc),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        long,   nr_to_write             )
                __field(        long,   pages_skipped           )
                __field(        loff_t, range_start             )
                __field(        loff_t, range_end               )
+               __field(        int,    sync_mode               )
                __field(        char,   for_kupdate             )
-               __field(        char,   for_reclaim             )
                __field(        char,   range_cyclic            )
                __field(       pgoff_t, writeback_index         )
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor      = MINOR(inode->i_sb->s_dev);
+               __entry->dev            = inode->i_sb->s_dev;
                __entry->ino            = inode->i_ino;
                __entry->nr_to_write    = wbc->nr_to_write;
                __entry->pages_skipped  = wbc->pages_skipped;
                __entry->range_start    = wbc->range_start;
                __entry->range_end      = wbc->range_end;
+               __entry->sync_mode      = wbc->sync_mode;
                __entry->for_kupdate    = wbc->for_kupdate;
-               __entry->for_reclaim    = wbc->for_reclaim;
                __entry->range_cyclic   = wbc->range_cyclic;
                __entry->writeback_index = inode->i_mapping->writeback_index;
        ),
 
        TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
-                 "range_start %llu range_end %llu "
-                 "for_kupdate %d for_reclaim %d "
-                 "range_cyclic %d writeback_index %lu",
-                 __entry->dev_major, __entry->dev_minor,
+                 "range_start %llu range_end %llu sync_mode %d"
+                 "for_kupdate %d range_cyclic %d writeback_index %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino, __entry->nr_to_write,
                  __entry->pages_skipped, __entry->range_start,
-                 __entry->range_end,
-                 __entry->for_kupdate, __entry->for_reclaim,
-                 __entry->range_cyclic,
+                 __entry->range_end, __entry->sync_mode,
+                 __entry->for_kupdate, __entry->range_cyclic,
                  (unsigned long) __entry->writeback_index)
 );
 
@@ -381,8 +357,7 @@ TRACE_EVENT(ext4_da_write_pages,
        TP_ARGS(inode, mpd),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        __u64,  b_blocknr               )
                __field(        __u32,  b_size                  )
@@ -390,11 +365,11 @@ TRACE_EVENT(ext4_da_write_pages,
                __field(        unsigned long,  first_page      )
                __field(        int,    io_done                 )
                __field(        int,    pages_written           )
+               __field(        int,    sync_mode               )
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor      = MINOR(inode->i_sb->s_dev);
+               __entry->dev            = inode->i_sb->s_dev;
                __entry->ino            = inode->i_ino;
                __entry->b_blocknr      = mpd->b_blocknr;
                __entry->b_size         = mpd->b_size;
@@ -402,14 +377,18 @@ TRACE_EVENT(ext4_da_write_pages,
                __entry->first_page     = mpd->first_page;
                __entry->io_done        = mpd->io_done;
                __entry->pages_written  = mpd->pages_written;
+               __entry->sync_mode      = mpd->wbc->sync_mode;
        ),
 
-       TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
-                 __entry->dev_major, __entry->dev_minor,
+       TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x "
+                 "first_page %lu io_done %d pages_written %d sync_mode %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->b_blocknr, __entry->b_size,
                  __entry->b_state, __entry->first_page,
-                 __entry->io_done, __entry->pages_written)
+                 __entry->io_done, __entry->pages_written,
+                 __entry->sync_mode
+                  )
 );
 
 TRACE_EVENT(ext4_da_writepages_result,
@@ -419,35 +398,100 @@ TRACE_EVENT(ext4_da_writepages_result,
        TP_ARGS(inode, wbc, ret, pages_written),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        int,    ret                     )
                __field(        int,    pages_written           )
                __field(        long,   pages_skipped           )
+               __field(        int,    sync_mode               )
                __field(        char,   more_io                 )       
                __field(       pgoff_t, writeback_index         )
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor      = MINOR(inode->i_sb->s_dev);
+               __entry->dev            = inode->i_sb->s_dev;
                __entry->ino            = inode->i_ino;
                __entry->ret            = ret;
                __entry->pages_written  = pages_written;
                __entry->pages_skipped  = wbc->pages_skipped;
+               __entry->sync_mode      = wbc->sync_mode;
                __entry->more_io        = wbc->more_io;
                __entry->writeback_index = inode->i_mapping->writeback_index;
        ),
 
-       TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
-                 __entry->dev_major, __entry->dev_minor,
+       TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld "
+                 " more_io %d sync_mode %d writeback_index %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino, __entry->ret,
                  __entry->pages_written, __entry->pages_skipped,
-                 __entry->more_io,
+                 __entry->more_io, __entry->sync_mode,
                  (unsigned long) __entry->writeback_index)
 );
 
+DECLARE_EVENT_CLASS(ext4__page_op,
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page),
+
+       TP_STRUCT__entry(
+               __field(        pgoff_t, index                  )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+
+       ),
+
+       TP_fast_assign(
+               __entry->index  = page->index;
+               __entry->ino    = page->mapping->host->i_ino;
+               __entry->dev    = page->mapping->host->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu page_index %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->index)
+);
+
+DEFINE_EVENT(ext4__page_op, ext4_readpage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext4__page_op, ext4_releasepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+TRACE_EVENT(ext4_invalidatepage,
+       TP_PROTO(struct page *page, unsigned long offset),
+
+       TP_ARGS(page, offset),
+
+       TP_STRUCT__entry(
+               __field(        pgoff_t, index                  )
+               __field(        unsigned long, offset           )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+
+       ),
+
+       TP_fast_assign(
+               __entry->index  = page->index;
+               __entry->offset = offset;
+               __entry->ino    = page->mapping->host->i_ino;
+               __entry->dev    = page->mapping->host->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->index, __entry->offset)
+);
+
 TRACE_EVENT(ext4_discard_blocks,
        TP_PROTO(struct super_block *sb, unsigned long long blk,
                        unsigned long long count),
@@ -455,22 +499,20 @@ TRACE_EVENT(ext4_discard_blocks,
        TP_ARGS(sb, blk, count),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        __u64,  blk                     )
                __field(        __u64,  count                   )
 
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(sb->s_dev);
-               __entry->dev_minor = MINOR(sb->s_dev);
+               __entry->dev    = sb->s_dev;
                __entry->blk    = blk;
                __entry->count  = count;
        ),
 
        TP_printk("dev %d,%d blk %llu count %llu",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->blk, __entry->count)
 );
 
@@ -481,8 +523,7 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
        TP_ARGS(ac, pa),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        __u64,  pa_pstart               )
                __field(        __u32,  pa_len                  )
@@ -491,8 +532,7 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(ac->ac_sb->s_dev);
-               __entry->dev_minor      = MINOR(ac->ac_sb->s_dev);
+               __entry->dev            = ac->ac_sb->s_dev;
                __entry->ino            = ac->ac_inode->i_ino;
                __entry->pa_pstart      = pa->pa_pstart;
                __entry->pa_len         = pa->pa_len;
@@ -500,9 +540,9 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
        ),
 
        TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
-                 __entry->dev_major, __entry->dev_minor,
-                 (unsigned long) __entry->ino, __entry->pa_pstart,
-                 __entry->pa_len, __entry->pa_lstart)
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
 );
 
 DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
@@ -530,8 +570,7 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
        TP_ARGS(sb, inode, pa, block, count),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        __u64,  block                   )
                __field(        __u32,  count                   )
@@ -539,16 +578,16 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(sb->s_dev);
-               __entry->dev_minor      = MINOR(sb->s_dev);
+               __entry->dev            = sb->s_dev;
                __entry->ino            = inode->i_ino;
                __entry->block          = block;
                __entry->count          = count;
        ),
 
        TP_printk("dev %d,%d ino %lu block %llu count %u",
-                 __entry->dev_major, __entry->dev_minor,
-                 (unsigned long) __entry->ino, __entry->block, __entry->count)
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->block, __entry->count)
 );
 
 TRACE_EVENT(ext4_mb_release_group_pa,
@@ -558,22 +597,20 @@ TRACE_EVENT(ext4_mb_release_group_pa,
        TP_ARGS(sb, pa),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        __u64,  pa_pstart               )
                __field(        __u32,  pa_len                  )
 
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(sb->s_dev);
-               __entry->dev_minor      = MINOR(sb->s_dev);
+               __entry->dev            = sb->s_dev;
                __entry->pa_pstart      = pa->pa_pstart;
                __entry->pa_len         = pa->pa_len;
        ),
 
        TP_printk("dev %d,%d pstart %llu len %u",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->pa_pstart, __entry->pa_len)
 );
 
@@ -583,20 +620,18 @@ TRACE_EVENT(ext4_discard_preallocations,
        TP_ARGS(inode),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
 
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
        ),
 
        TP_printk("dev %d,%d ino %lu",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino)
 );
 
@@ -606,20 +641,19 @@ TRACE_EVENT(ext4_mb_discard_preallocations,
        TP_ARGS(sb, needed),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        int,    needed                  )
 
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(sb->s_dev);
-               __entry->dev_minor = MINOR(sb->s_dev);
+               __entry->dev    = sb->s_dev;
                __entry->needed = needed;
        ),
 
        TP_printk("dev %d,%d needed %d",
-                 __entry->dev_major, __entry->dev_minor, __entry->needed)
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->needed)
 );
 
 TRACE_EVENT(ext4_request_blocks,
@@ -628,8 +662,7 @@ TRACE_EVENT(ext4_request_blocks,
        TP_ARGS(ar),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        unsigned int, flags             )
                __field(        unsigned int, len               )
@@ -642,8 +675,7 @@ TRACE_EVENT(ext4_request_blocks,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
+               __entry->dev    = ar->inode->i_sb->s_dev;
                __entry->ino    = ar->inode->i_ino;
                __entry->flags  = ar->flags;
                __entry->len    = ar->len;
@@ -655,8 +687,9 @@ TRACE_EVENT(ext4_request_blocks,
                __entry->pright = ar->pright;
        ),
 
-       TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-                 __entry->dev_major, __entry->dev_minor,
+       TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu "
+                 "lleft %llu lright %llu pleft %llu pright %llu ",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->flags, __entry->len,
                  (unsigned long long) __entry->logical,
@@ -673,8 +706,7 @@ TRACE_EVENT(ext4_allocate_blocks,
        TP_ARGS(ar, block),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        __u64,  block                   )
                __field(        unsigned int, flags             )
@@ -688,8 +720,7 @@ TRACE_EVENT(ext4_allocate_blocks,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
+               __entry->dev    = ar->inode->i_sb->s_dev;
                __entry->ino    = ar->inode->i_ino;
                __entry->block  = block;
                __entry->flags  = ar->flags;
@@ -702,10 +733,11 @@ TRACE_EVENT(ext4_allocate_blocks,
                __entry->pright = ar->pright;
        ),
 
-       TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-                 __entry->dev_major, __entry->dev_minor,
-                 (unsigned long) __entry->ino, __entry->flags,
-                 __entry->len, __entry->block,
+       TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu "
+                 "goal %llu lleft %llu lright %llu pleft %llu pright %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->flags, __entry->len, __entry->block,
                  (unsigned long long) __entry->logical,
                  (unsigned long long) __entry->goal,
                  (unsigned long long) __entry->lleft,
@@ -721,8 +753,7 @@ TRACE_EVENT(ext4_free_blocks,
        TP_ARGS(inode, block, count, flags),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(      umode_t, mode                     )
                __field(        __u64,  block                   )
@@ -731,8 +762,7 @@ TRACE_EVENT(ext4_free_blocks,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor      = MINOR(inode->i_sb->s_dev);
+               __entry->dev            = inode->i_sb->s_dev;
                __entry->ino            = inode->i_ino;
                __entry->mode           = inode->i_mode;
                __entry->block          = block;
@@ -741,20 +771,19 @@ TRACE_EVENT(ext4_free_blocks,
        ),
 
        TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->mode, __entry->block, __entry->count,
                  __entry->flags)
 );
 
-TRACE_EVENT(ext4_sync_file,
+TRACE_EVENT(ext4_sync_file_enter,
        TP_PROTO(struct file *file, int datasync),
 
        TP_ARGS(file, datasync),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        ino_t,  parent                  )
                __field(        int,    datasync                )
@@ -763,39 +792,60 @@ TRACE_EVENT(ext4_sync_file,
        TP_fast_assign(
                struct dentry *dentry = file->f_path.dentry;
 
-               __entry->dev_major      = MAJOR(dentry->d_inode->i_sb->s_dev);
-               __entry->dev_minor      = MINOR(dentry->d_inode->i_sb->s_dev);
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
                __entry->ino            = dentry->d_inode->i_ino;
                __entry->datasync       = datasync;
                __entry->parent         = dentry->d_parent->d_inode->i_ino;
        ),
 
        TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  (unsigned long) __entry->parent, __entry->datasync)
 );
 
+TRACE_EVENT(ext4_sync_file_exit,
+       TP_PROTO(struct inode *inode, int ret),
+
+       TP_ARGS(inode, ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    ret                     )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ret            = ret;
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %ld ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->ret)
+);
+
 TRACE_EVENT(ext4_sync_fs,
        TP_PROTO(struct super_block *sb, int wait),
 
        TP_ARGS(sb, wait),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        int,    wait                    )
 
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(sb->s_dev);
-               __entry->dev_minor = MINOR(sb->s_dev);
+               __entry->dev    = sb->s_dev;
                __entry->wait   = wait;
        ),
 
-       TP_printk("dev %d,%d wait %d", __entry->dev_major,
-                 __entry->dev_minor, __entry->wait)
+       TP_printk("dev %d,%d wait %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->wait)
 );
 
 TRACE_EVENT(ext4_alloc_da_blocks,
@@ -804,23 +854,21 @@ TRACE_EVENT(ext4_alloc_da_blocks,
        TP_ARGS(inode),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field( unsigned int,  data_blocks     )
                __field( unsigned int,  meta_blocks     )
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
                __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
        ),
 
        TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->data_blocks, __entry->meta_blocks)
 );
@@ -831,8 +879,7 @@ TRACE_EVENT(ext4_mballoc_alloc,
        TP_ARGS(ac),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        __u16,  found                   )
                __field(        __u16,  groups                  )
@@ -855,8 +902,7 @@ TRACE_EVENT(ext4_mballoc_alloc,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(ac->ac_inode->i_sb->s_dev);
-               __entry->dev_minor      = MINOR(ac->ac_inode->i_sb->s_dev);
+               __entry->dev            = ac->ac_inode->i_sb->s_dev;
                __entry->ino            = ac->ac_inode->i_ino;
                __entry->found          = ac->ac_found;
                __entry->flags          = ac->ac_flags;
@@ -881,7 +927,7 @@ TRACE_EVENT(ext4_mballoc_alloc,
        TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
                  "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
                  "tail %u broken %u",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->orig_group, __entry->orig_start,
                  __entry->orig_len, __entry->orig_logical,
@@ -900,8 +946,7 @@ TRACE_EVENT(ext4_mballoc_prealloc,
        TP_ARGS(ac),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        __u32,  orig_logical            )
                __field(          int,  orig_start              )
@@ -914,8 +959,7 @@ TRACE_EVENT(ext4_mballoc_prealloc,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(ac->ac_inode->i_sb->s_dev);
-               __entry->dev_minor      = MINOR(ac->ac_inode->i_sb->s_dev);
+               __entry->dev            = ac->ac_inode->i_sb->s_dev;
                __entry->ino            = ac->ac_inode->i_ino;
                __entry->orig_logical   = ac->ac_o_ex.fe_logical;
                __entry->orig_start     = ac->ac_o_ex.fe_start;
@@ -928,7 +972,7 @@ TRACE_EVENT(ext4_mballoc_prealloc,
        ),
 
        TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->orig_group, __entry->orig_start,
                  __entry->orig_len, __entry->orig_logical,
@@ -946,8 +990,7 @@ DECLARE_EVENT_CLASS(ext4__mballoc,
        TP_ARGS(sb, inode, group, start, len),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(          int,  result_start            )
                __field(        __u32,  result_group            )
@@ -955,8 +998,7 @@ DECLARE_EVENT_CLASS(ext4__mballoc,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(sb->s_dev);
-               __entry->dev_minor      = MINOR(sb->s_dev);
+               __entry->dev            = sb->s_dev;
                __entry->ino            = inode ? inode->i_ino : 0;
                __entry->result_start   = start;
                __entry->result_group   = group;
@@ -964,7 +1006,7 @@ DECLARE_EVENT_CLASS(ext4__mballoc,
        ),
 
        TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
-                 __entry->dev_major, __entry->dev_minor,
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->result_group, __entry->result_start,
                  __entry->result_len)
@@ -998,8 +1040,7 @@ TRACE_EVENT(ext4_forget,
        TP_ARGS(inode, is_metadata, block),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        umode_t, mode                   )
                __field(        int,    is_metadata             )
@@ -1007,8 +1048,7 @@ TRACE_EVENT(ext4_forget,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->mode   = inode->i_mode;
                __entry->is_metadata = is_metadata;
@@ -1016,9 +1056,9 @@ TRACE_EVENT(ext4_forget,
        ),
 
        TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
-                 __entry->dev_major, __entry->dev_minor,
-                 (unsigned long) __entry->ino, __entry->mode,
-                 __entry->is_metadata, __entry->block)
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode, __entry->is_metadata, __entry->block)
 );
 
 TRACE_EVENT(ext4_da_update_reserve_space,
@@ -1027,8 +1067,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
        TP_ARGS(inode, used_blocks),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        umode_t, mode                   )
                __field(        __u64,  i_blocks                )
@@ -1039,8 +1078,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->mode   = inode->i_mode;
                __entry->i_blocks = inode->i_blocks;
@@ -1050,10 +1088,12 @@ TRACE_EVENT(ext4_da_update_reserve_space,
                __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
        ),
 
-       TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
-                 __entry->dev_major, __entry->dev_minor,
-                 (unsigned long) __entry->ino, __entry->mode,
-                 (unsigned long long) __entry->i_blocks,
+       TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
+                 "reserved_data_blocks %d reserved_meta_blocks %d "
+                 "allocated_meta_blocks %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode,  (unsigned long long) __entry->i_blocks,
                  __entry->used_blocks, __entry->reserved_data_blocks,
                  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
 );
@@ -1064,8 +1104,7 @@ TRACE_EVENT(ext4_da_reserve_space,
        TP_ARGS(inode, md_needed),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        umode_t, mode                   )
                __field(        __u64,  i_blocks                )
@@ -1075,8 +1114,7 @@ TRACE_EVENT(ext4_da_reserve_space,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->mode   = inode->i_mode;
                __entry->i_blocks = inode->i_blocks;
@@ -1085,8 +1123,9 @@ TRACE_EVENT(ext4_da_reserve_space,
                __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
        ),
 
-       TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
-                 __entry->dev_major, __entry->dev_minor,
+       TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
+                 "reserved_data_blocks %d reserved_meta_blocks %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->mode, (unsigned long long) __entry->i_blocks,
                  __entry->md_needed, __entry->reserved_data_blocks,
@@ -1099,8 +1138,7 @@ TRACE_EVENT(ext4_da_release_space,
        TP_ARGS(inode, freed_blocks),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
                __field(        umode_t, mode                   )
                __field(        __u64,  i_blocks                )
@@ -1111,8 +1149,7 @@ TRACE_EVENT(ext4_da_release_space,
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
                __entry->mode   = inode->i_mode;
                __entry->i_blocks = inode->i_blocks;
@@ -1122,8 +1159,10 @@ TRACE_EVENT(ext4_da_release_space,
                __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
        ),
 
-       TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
-                 __entry->dev_major, __entry->dev_minor,
+       TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d "
+                 "reserved_data_blocks %d reserved_meta_blocks %d "
+                 "allocated_meta_blocks %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long) __entry->ino,
                  __entry->mode, (unsigned long long) __entry->i_blocks,
                  __entry->freed_blocks, __entry->reserved_data_blocks,
@@ -1136,20 +1175,19 @@ DECLARE_EVENT_CLASS(ext4__bitmap_load,
        TP_ARGS(sb, group),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        __u32,  group                   )
 
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(sb->s_dev);
-               __entry->dev_minor = MINOR(sb->s_dev);
+               __entry->dev    = sb->s_dev;
                __entry->group  = group;
        ),
 
        TP_printk("dev %d,%d group %u",
-                 __entry->dev_major, __entry->dev_minor, __entry->group)
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->group)
 );
 
 DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,
@@ -1166,6 +1204,349 @@ DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load,
        TP_ARGS(sb, group)
 );
 
+DEFINE_EVENT(ext4__bitmap_load, ext4_read_block_bitmap_load,
+
+       TP_PROTO(struct super_block *sb, unsigned long group),
+
+       TP_ARGS(sb, group)
+);
+
+DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
+
+       TP_PROTO(struct super_block *sb, unsigned long group),
+
+       TP_ARGS(sb, group)
+);
+
+TRACE_EVENT(ext4_direct_IO_enter,
+       TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+
+       TP_ARGS(inode, offset, len, rw),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned long,  len             )
+               __field(        int,    rw                      )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pos    = offset;
+               __entry->len    = len;
+               __entry->rw     = rw;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len, __entry->rw)
+);
+
+TRACE_EVENT(ext4_direct_IO_exit,
+       TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret),
+
+       TP_ARGS(inode, offset, len, rw, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned long,  len             )
+               __field(        int,    rw                      )
+               __field(        int,    ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pos    = offset;
+               __entry->len    = len;
+               __entry->rw     = rw;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->rw, __entry->ret)
+);
+
+TRACE_EVENT(ext4_fallocate_enter,
+       TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
+
+       TP_ARGS(inode, offset, len, mode),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        loff_t, pos                     )
+               __field(        loff_t, len                     )
+               __field(        int,    mode                    )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pos    = offset;
+               __entry->len    = len;
+               __entry->mode   = mode;
+       ),
+
+       TP_printk("dev %d,%d ino %ld pos %llu len %llu mode %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos,
+                 (unsigned long long) __entry->len, __entry->mode)
+);
+
+TRACE_EVENT(ext4_fallocate_exit,
+       TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret),
+
+       TP_ARGS(inode, offset, max_blocks, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned,       blocks          )
+               __field(        int,    ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pos    = offset;
+               __entry->blocks = max_blocks;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %ld pos %llu blocks %d ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->blocks,
+                 __entry->ret)
+);
+
+TRACE_EVENT(ext4_unlink_enter,
+       TP_PROTO(struct inode *parent, struct dentry *dentry),
+
+       TP_ARGS(parent, dentry),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  parent                  )
+               __field(        ino_t,  ino                     )
+               __field(        loff_t, size                    )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->parent         = parent->i_ino;
+               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->size           = dentry->d_inode->i_size;
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %ld size %lld parent %ld",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->size,
+                 (unsigned long) __entry->parent)
+);
+
+TRACE_EVENT(ext4_unlink_exit,
+       TP_PROTO(struct dentry *dentry, int ret),
+
+       TP_ARGS(dentry, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        int,    ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ret            = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %ld ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(ext4__truncate,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        blkcnt_t,       blocks          )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->blocks = inode->i_blocks;
+       ),
+
+       TP_printk("dev %d,%d ino %lu blocks %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
+);
+
+DEFINE_EVENT(ext4__truncate, ext4_truncate_enter,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+                unsigned len, unsigned flags),
+
+       TP_ARGS(inode, lblk, len, flags),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        ext4_lblk_t,    lblk            )
+               __field(        unsigned,       len             )
+               __field(        unsigned,       flags           )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->len    = len;
+               __entry->flags  = flags;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->lblk, __entry->len, __entry->flags)
+);
+
+DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+                unsigned len, unsigned flags),
+
+       TP_ARGS(inode, lblk, len, flags)
+);
+
+DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+                unsigned len, unsigned flags),
+
+       TP_ARGS(inode, lblk, len, flags)
+);
+
+DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+                ext4_fsblk_t pblk, unsigned len, int ret),
+
+       TP_ARGS(inode, lblk, pblk, len, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        ext4_lblk_t,    lblk            )
+               __field(        ext4_fsblk_t,   pblk            )
+               __field(        unsigned,       len             )
+               __field(        int,            ret             )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->pblk   = pblk;
+               __entry->len    = len;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
+                 __entry->len, __entry->ret)
+);
+
+DEFINE_EVENT(ext4__map_blocks_exit, ext4_ext_map_blocks_exit,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+                ext4_fsblk_t pblk, unsigned len, int ret),
+
+       TP_ARGS(inode, lblk, pblk, len, ret)
+);
+
+DEFINE_EVENT(ext4__map_blocks_exit, ext4_ind_map_blocks_exit,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+                ext4_fsblk_t pblk, unsigned len, int ret),
+
+       TP_ARGS(inode, lblk, pblk, len, ret)
+);
+
+TRACE_EVENT(ext4_ext_load_extent,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk),
+
+       TP_ARGS(inode, lblk, pblk),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        ext4_lblk_t,    lblk            )
+               __field(        ext4_fsblk_t,   pblk            )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->pblk   = pblk;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %u pblk %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned) __entry->lblk, (unsigned long long) __entry->pblk)
+);
+
+TRACE_EVENT(ext4_load_inode,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino             )
+               __field(        dev_t,  dev             )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %ld",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
index 7447ea9305b54eeece947d812a1c87cd3a58ba1c..bf16545cc97756d263305f17712f522b979bbf32 100644 (file)
@@ -17,19 +17,17 @@ TRACE_EVENT(jbd2_checkpoint,
        TP_ARGS(journal, result),
 
        TP_STRUCT__entry(
-               __field(        int,    dev_major               )
-               __field(        int,    dev_minor               )
+               __field(        dev_t,  dev                     )
                __field(        int,    result                  )
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(journal->j_fs_dev->bd_dev);
-               __entry->dev_minor      = MINOR(journal->j_fs_dev->bd_dev);
+               __entry->dev            = journal->j_fs_dev->bd_dev;
                __entry->result         = result;
        ),
 
-       TP_printk("dev %d,%d result %d",
-                 __entry->dev_major, __entry->dev_minor, __entry->result)
+       TP_printk("dev %s result %d",
+                 jbd2_dev_to_name(__entry->dev), __entry->result)
 );
 
 DECLARE_EVENT_CLASS(jbd2_commit,
@@ -39,22 +37,20 @@ DECLARE_EVENT_CLASS(jbd2_commit,
        TP_ARGS(journal, commit_transaction),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        char,   sync_commit               )
                __field(        int,    transaction               )
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(journal->j_fs_dev->bd_dev);
-               __entry->dev_minor      = MINOR(journal->j_fs_dev->bd_dev);
+               __entry->dev            = journal->j_fs_dev->bd_dev;
                __entry->sync_commit = commit_transaction->t_synchronous_commit;
                __entry->transaction    = commit_transaction->t_tid;
        ),
 
-       TP_printk("dev %d,%d transaction %d sync %d",
-                 __entry->dev_major, __entry->dev_minor,
-                 __entry->transaction, __entry->sync_commit)
+       TP_printk("dev %s transaction %d sync %d",
+                 jbd2_dev_to_name(__entry->dev), __entry->transaction,
+                 __entry->sync_commit)
 );
 
 DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
@@ -91,24 +87,22 @@ TRACE_EVENT(jbd2_end_commit,
        TP_ARGS(journal, commit_transaction),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        char,   sync_commit               )
                __field(        int,    transaction               )
                __field(        int,    head                      )
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(journal->j_fs_dev->bd_dev);
-               __entry->dev_minor      = MINOR(journal->j_fs_dev->bd_dev);
+               __entry->dev            = journal->j_fs_dev->bd_dev;
                __entry->sync_commit = commit_transaction->t_synchronous_commit;
                __entry->transaction    = commit_transaction->t_tid;
                __entry->head           = journal->j_tail_sequence;
        ),
 
-       TP_printk("dev %d,%d transaction %d sync %d head %d",
-                 __entry->dev_major, __entry->dev_minor,
-                 __entry->transaction, __entry->sync_commit, __entry->head)
+       TP_printk("dev %s transaction %d sync %d head %d",
+                 jbd2_dev_to_name(__entry->dev), __entry->transaction,
+                 __entry->sync_commit, __entry->head)
 );
 
 TRACE_EVENT(jbd2_submit_inode_data,
@@ -117,20 +111,17 @@ TRACE_EVENT(jbd2_submit_inode_data,
        TP_ARGS(inode),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
        ),
 
        TP_fast_assign(
-               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
-               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
        ),
 
-       TP_printk("dev %d,%d ino %lu",
-                 __entry->dev_major, __entry->dev_minor,
-                 (unsigned long) __entry->ino)
+       TP_printk("dev %s ino %lu",
+                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
 );
 
 TRACE_EVENT(jbd2_run_stats,
@@ -140,8 +131,7 @@ TRACE_EVENT(jbd2_run_stats,
        TP_ARGS(dev, tid, stats),
 
        TP_STRUCT__entry(
-               __field(                  int,  dev_major       )
-               __field(                  int,  dev_minor       )
+               __field(                dev_t,  dev             )
                __field(        unsigned long,  tid             )
                __field(        unsigned long,  wait            )
                __field(        unsigned long,  running         )
@@ -154,8 +144,7 @@ TRACE_EVENT(jbd2_run_stats,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(dev);
-               __entry->dev_minor      = MINOR(dev);
+               __entry->dev            = dev;
                __entry->tid            = tid;
                __entry->wait           = stats->rs_wait;
                __entry->running        = stats->rs_running;
@@ -167,9 +156,9 @@ TRACE_EVENT(jbd2_run_stats,
                __entry->blocks_logged  = stats->rs_blocks_logged;
        ),
 
-       TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
+       TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
                  "logging %u handle_count %u blocks %u blocks_logged %u",
-                 __entry->dev_major, __entry->dev_minor, __entry->tid,
+                 jbd2_dev_to_name(__entry->dev), __entry->tid,
                  jiffies_to_msecs(__entry->wait),
                  jiffies_to_msecs(__entry->running),
                  jiffies_to_msecs(__entry->locked),
@@ -186,8 +175,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
        TP_ARGS(dev, tid, stats),
 
        TP_STRUCT__entry(
-               __field(                  int,  dev_major       )
-               __field(                  int,  dev_minor       )
+               __field(                dev_t,  dev             )
                __field(        unsigned long,  tid             )
                __field(        unsigned long,  chp_time        )
                __field(                __u32,  forced_to_close )
@@ -196,8 +184,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(dev);
-               __entry->dev_minor      = MINOR(dev);
+               __entry->dev            = dev;
                __entry->tid            = tid;
                __entry->chp_time       = stats->cs_chp_time;
                __entry->forced_to_close= stats->cs_forced_to_close;
@@ -205,9 +192,9 @@ TRACE_EVENT(jbd2_checkpoint_stats,
                __entry->dropped        = stats->cs_dropped;
        ),
 
-       TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
+       TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
                  "written %u dropped %u",
-                 __entry->dev_major, __entry->dev_minor, __entry->tid,
+                 jbd2_dev_to_name(__entry->dev), __entry->tid,
                  jiffies_to_msecs(__entry->chp_time),
                  __entry->forced_to_close, __entry->written, __entry->dropped)
 );
@@ -220,8 +207,7 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
        TP_ARGS(journal, first_tid, block_nr, freed),
 
        TP_STRUCT__entry(
-               __field(        int,   dev_major                )
-               __field(        int,   dev_minor                )
+               __field(        dev_t,  dev                     )
                __field(        tid_t,  tail_sequence           )
                __field(        tid_t,  first_tid               )
                __field(unsigned long,  block_nr                )
@@ -229,18 +215,16 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
        ),
 
        TP_fast_assign(
-               __entry->dev_major      = MAJOR(journal->j_fs_dev->bd_dev);
-               __entry->dev_minor      = MINOR(journal->j_fs_dev->bd_dev);
+               __entry->dev            = journal->j_fs_dev->bd_dev;
                __entry->tail_sequence  = journal->j_tail_sequence;
                __entry->first_tid      = first_tid;
                __entry->block_nr       = block_nr;
                __entry->freed          = freed;
        ),
 
-       TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
-                 __entry->dev_major, __entry->dev_minor,
-                 __entry->tail_sequence, __entry->first_tid,
-                 __entry->block_nr, __entry->freed)
+       TP_printk("dev %s from %u to %u offset %lu freed %lu",
+                 jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
+                 __entry->first_tid, __entry->block_nr, __entry->freed)
 );
 
 #endif /* _TRACE_JBD2_H */