Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 10 Dec 2009 17:33:29 +0000 (09:33 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 10 Dec 2009 17:33:29 +0000 (09:33 -0800)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (47 commits)
  ext4: Fix potential fiemap deadlock (mmap_sem vs. i_data_sem)
  ext4: Do not override ext2 or ext3 if built they are built as modules
  jbd2: Export jbd2_log_start_commit to fix ext4 build
  ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT
  ext4: Wait for proper transaction commit on fsync
  ext4: fix incorrect block reservation on quota transfer.
  ext4: quota macros cleanup
  ext4: ext4_get_reserved_space() must return bytes instead of blocks
  ext4: remove blocks from inode prealloc list on failure
  ext4: wait for log to commit when umounting
  ext4: Avoid data / filesystem corruption when write fails to copy data
  ext4: Use ext4 file system driver for ext2/ext3 file system mounts
  ext4: Return the PTR_ERR of the correct pointer in setup_new_group_blocks()
  jbd2: Add ENOMEM checking in and for jbd2_journal_write_metadata_buffer()
  ext4: remove unused parameter wbc from __ext4_journalled_writepage()
  ext4: remove encountered_congestion trace
  ext4: move_extent_per_page() cleanup
  ext4: initialize moved_len before calling ext4_move_extents()
  ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT
  ext4: use ext4_data_block_valid() in ext4_free_blocks()
  ...

21 files changed:
Documentation/filesystems/ext4.txt
fs/ext4/Kconfig
fs/ext4/balloc.c
fs/ext4/block_validity.c
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.c
fs/ext4/ext4_jbd2.h
fs/ext4/extents.c
fs/ext4/fsync.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/migrate.c
fs/ext4/move_extent.c
fs/ext4/namei.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/jbd2/commit.c
fs/jbd2/journal.c
include/trace/events/ext4.h

index 6d94e0696f8c313ee5a439f5ed2f140b356500f6..af6885c3c821f621f5827a46cb0728c61a9e768c 100644 (file)
@@ -153,8 +153,8 @@ journal_dev=devnum  When the external journal device's major/minor numbers
                        identified through its new major/minor numbers encoded
                        in devnum.
 
-noload                 Don't load the journal on mounting.  Note that
-                       if the filesystem was not unmounted cleanly,
+norecovery             Don't load the journal on mounting.  Note that
+noload                 if the filesystem was not unmounted cleanly,
                        skipping the journal replay will lead to the
                        filesystem containing inconsistencies that can
                        lead to any number of problems.
@@ -353,6 +353,12 @@ noauto_da_alloc            replacing existing files via patterns such as
                        system crashes before the delayed allocation
                        blocks are forced to disk.
 
+discard                Controls whether ext4 should issue discard/TRIM
+nodiscard(*)           commands to the underlying block device when
+                       blocks are freed.  This is useful for SSD devices
+                       and sparse/thinly-provisioned LUNs, but it is off
+                       by default until sufficient testing has been done.
+
 Data Mode
 =========
 There are 3 different data modes:
index 9f2d45d75b1a70084e5f153c94d49a2b5f863321..9acf7e808139d3df688da7d66ffb33b5cf081d8e 100644 (file)
@@ -26,6 +26,16 @@ config EXT4_FS
 
          If unsure, say N.
 
+config EXT4_USE_FOR_EXT23
+       bool "Use ext4 for ext2/ext3 file systems"
+       depends on EXT3_FS=n || EXT2_FS=n
+       default y
+       help
+         Allow the ext4 file system driver code to be used for ext2 or
+         ext3 file system mounts.  This allows users to reduce their
+         compiled kernel size by using one file system driver for
+         ext2, ext3, and ext4 file systems.
+
 config EXT4_FS_XATTR
        bool "Ext4 extended attributes"
        depends on EXT4_FS
index 1d0418980f8db58e4ce34a3c47e00ee8bcb53d01..22bc7435d9134b397f537440faa15ce890b7c44f 100644 (file)
@@ -498,44 +498,6 @@ error_return:
        return;
 }
 
-/**
- * ext4_free_blocks() -- Free given blocks and update quota
- * @handle:            handle for this transaction
- * @inode:             inode
- * @block:             start physical block to free
- * @count:             number of blocks to count
- * @metadata:          Are these metadata blocks
- */
-void ext4_free_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t block, unsigned long count,
-                       int metadata)
-{
-       struct super_block *sb;
-       unsigned long dquot_freed_blocks;
-
-       /* this isn't the right place to decide whether block is metadata
-        * inode.c/extents.c knows better, but for safety ... */
-       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-               metadata = 1;
-
-       /* We need to make sure we don't reuse
-        * block released untill the transaction commit.
-        * writeback mode have weak data consistency so
-        * don't force data as metadata when freeing block
-        * for writeback mode.
-        */
-       if (metadata == 0 && !ext4_should_writeback_data(inode))
-               metadata = 1;
-
-       sb = inode->i_sb;
-
-       ext4_mb_free_blocks(handle, inode, block, count,
-                           metadata, &dquot_freed_blocks);
-       if (dquot_freed_blocks)
-               vfs_dq_free_block(inode, dquot_freed_blocks);
-       return;
-}
-
 /**
  * ext4_has_free_blocks()
  * @sbi:       in-core super block structure.
@@ -761,7 +723,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
 static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
                                        ext4_group_t group)
 {
-       return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
+       if (!ext4_bg_has_super(sb, group))
+               return 0;
+
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
+               return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
+       else
+               return EXT4_SB(sb)->s_gdb_count;
 }
 
 /**
index 50784ef0756311b2958686fdbf3c60e1c7fe04fa..4df8621ec31cff1f5c8eef0211942750b92fd009 100644 (file)
@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb)
                if (ext4_bg_has_super(sb, i) &&
                    ((i < 5) || ((i % flex_size) == 0)))
                        add_system_zone(sbi, ext4_group_first_block_no(sb, i),
-                                       sbi->s_gdb_count + 1);
+                                       ext4_bg_num_gdb(sb, i) + 1);
                gdp = ext4_get_group_desc(sb, i, NULL);
                ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
                if (ret)
@@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
        struct rb_node *n = sbi->system_blks.rb_node;
 
        if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+           (start_blk + count < start_blk) ||
            (start_blk + count > ext4_blocks_count(sbi->s_es)))
                return 0;
        while (n) {
index 8825515eeddd8042881365c6e84a54fba9c6c2f3..ab31e65d46d017880644a540e23f05bca5398a92 100644 (file)
@@ -375,6 +375,12 @@ struct ext4_new_group_data {
 #define EXT4_GET_BLOCKS_DIO_CONVERT_EXT                (EXT4_GET_BLOCKS_CONVERT|\
                                         EXT4_GET_BLOCKS_DIO_CREATE_EXT)
 
+/*
+ * Flags used by ext4_free_blocks
+ */
+#define EXT4_FREE_BLOCKS_METADATA      0x0001
+#define EXT4_FREE_BLOCKS_FORGET                0x0002
+
 /*
  * ioctl commands
  */
@@ -703,6 +709,13 @@ struct ext4_inode_info {
        struct list_head i_aio_dio_complete_list;
        /* current io_end structure for async DIO write*/
        ext4_io_end_t *cur_aio_dio;
+
+       /*
+        * Transactions that contain inode's metadata needed to complete
+        * fsync and fdatasync, respectively.
+        */
+       tid_t i_sync_tid;
+       tid_t i_datasync_tid;
 };
 
 /*
@@ -750,6 +763,7 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_DELALLOC            0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT      0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY      0x20000000 /* Block validity checking */
+#define EXT4_MOUNT_DISCARD             0x40000000 /* Issue DISCARD requests */
 
 #define clear_opt(o, opt)              o &= ~EXT4_MOUNT_##opt
 #define set_opt(o, opt)                        o |= EXT4_MOUNT_##opt
@@ -1324,8 +1338,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
                        ext4_fsblk_t goal, unsigned long *count, int *errp);
 extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
 extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
-extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t block, unsigned long count, int metadata);
 extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
                                ext4_fsblk_t block, unsigned long count);
 extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1384,16 +1396,15 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
 extern void ext4_discard_preallocations(struct inode *);
 extern int __init init_ext4_mballoc(void);
 extern void exit_ext4_mballoc(void);
-extern void ext4_mb_free_blocks(handle_t *, struct inode *,
-               ext4_fsblk_t, unsigned long, int, unsigned long *);
+extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
+                            struct buffer_head *bh, ext4_fsblk_t block,
+                            unsigned long count, int flags);
 extern int ext4_mb_add_groupinfo(struct super_block *sb,
                ext4_group_t i, struct ext4_group_desc *desc);
 extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
 extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
                                                ext4_group_t, int);
 /* inode.c */
-int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
-               struct buffer_head *bh, ext4_fsblk_t blocknr);
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
                                                ext4_lblk_t, int, int *);
 struct buffer_head *ext4_bread(handle_t *, struct inode *,
index 6a9409920deef2f6040b8d7279114cfc0612221d..b57e5c711b6de1048b98afb238fbdca874134497 100644 (file)
@@ -4,6 +4,8 @@
 
 #include "ext4_jbd2.h"
 
+#include <trace/events/ext4.h>
+
 int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
                                struct buffer_head *bh)
 {
@@ -32,35 +34,69 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
        return err;
 }
 
-int __ext4_journal_forget(const char *where, handle_t *handle,
-                               struct buffer_head *bh)
+/*
+ * The ext4 forget function must perform a revoke if we are freeing data
+ * which has been journaled.  Metadata (eg. indirect blocks) must be
+ * revoked in all cases.
+ *
+ * "bh" may be NULL: a metadata block may have been freed from memory
+ * but there may still be a record of it in the journal, and that record
+ * still needs to be revoked.
+ *
+ * If the handle isn't valid we're not journaling, but we still need to
+ * call into ext4_journal_revoke() to put the buffer head.
+ */
+int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
+                 struct inode *inode, struct buffer_head *bh,
+                 ext4_fsblk_t blocknr)
 {
-       int err = 0;
+       int err;
 
-       if (ext4_handle_valid(handle)) {
-               err = jbd2_journal_forget(handle, bh);
-               if (err)
-                       ext4_journal_abort_handle(where, __func__, bh,
-                                                 handle, err);
-       }
-       else
+       might_sleep();
+
+       trace_ext4_forget(inode, is_metadata, blocknr);
+       BUFFER_TRACE(bh, "enter");
+
+       jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
+                 "data mode %x\n",
+                 bh, is_metadata, inode->i_mode,
+                 test_opt(inode->i_sb, DATA_FLAGS));
+
+       /* In the no journal case, we can just do a bforget and return */
+       if (!ext4_handle_valid(handle)) {
                bforget(bh);
-       return err;
-}
+               return 0;
+       }
 
-int __ext4_journal_revoke(const char *where, handle_t *handle,
-                               ext4_fsblk_t blocknr, struct buffer_head *bh)
-{
-       int err = 0;
+       /* Never use the revoke function if we are doing full data
+        * journaling: there is no need to, and a V1 superblock won't
+        * support it.  Otherwise, only skip the revoke on un-journaled
+        * data blocks. */
 
-       if (ext4_handle_valid(handle)) {
-               err = jbd2_journal_revoke(handle, blocknr, bh);
-               if (err)
-                       ext4_journal_abort_handle(where, __func__, bh,
-                                                 handle, err);
+       if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
+           (!is_metadata && !ext4_should_journal_data(inode))) {
+               if (bh) {
+                       BUFFER_TRACE(bh, "call jbd2_journal_forget");
+                       err = jbd2_journal_forget(handle, bh);
+                       if (err)
+                               ext4_journal_abort_handle(where, __func__, bh,
+                                                         handle, err);
+                       return err;
+               }
+               return 0;
        }
-       else
-               bforget(bh);
+
+       /*
+        * data!=journal && (is_metadata || should_journal_data(inode))
+        */
+       BUFFER_TRACE(bh, "call jbd2_journal_revoke");
+       err = jbd2_journal_revoke(handle, blocknr, bh);
+       if (err) {
+               ext4_journal_abort_handle(where, __func__, bh, handle, err);
+               ext4_abort(inode->i_sb, __func__,
+                          "error %d when attempting revoke", err);
+       }
+       BUFFER_TRACE(bh, "exit");
        return err;
 }
 
index a2865980342f90c21aed6d731fcf1d66d72e0312..05eca817d7046258f54d3a8715dafb946ff5e155 100644 (file)
@@ -49,7 +49,7 @@
 
 #define EXT4_DATA_TRANS_BLOCKS(sb)     (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
                                         EXT4_XATTR_TRANS_BLOCKS - 2 + \
-                                        2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+                                        EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
 
 /*
  * Define the number of metadata blocks we need to account to modify data.
@@ -57,7 +57,7 @@
  * This include super block, inode block, quota blocks and xattr blocks
  */
 #define EXT4_META_TRANS_BLOCKS(sb)     (EXT4_XATTR_TRANS_BLOCKS + \
-                                       2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+                                       EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
 
 /* Delete operations potentially hit one directory's namespace plus an
  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
@@ -92,6 +92,7 @@
  * but inode, sb and group updates are done only once */
 #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
                (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
+
 #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
                (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
 #else
 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0
 #endif
+#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
 
 int
 ext4_mark_iloc_dirty(handle_t *handle,
@@ -116,12 +120,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
 int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
 
 /*
- * Wrapper functions with which ext4 calls into JBD.  The intent here is
- * to allow these to be turned into appropriate stubs so ext4 can control
- * ext2 filesystems, so ext2+ext4 systems only nee one fs.  This work hasn't
- * been done yet.
+ * Wrapper functions with which ext4 calls into JBD.
  */
-
 void ext4_journal_abort_handle(const char *caller, const char *err_fn,
                struct buffer_head *bh, handle_t *handle, int err);
 
@@ -131,13 +131,9 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
 int __ext4_journal_get_write_access(const char *where, handle_t *handle,
                                struct buffer_head *bh);
 
-/* When called with an invalid handle, this will still do a put on the BH */
-int __ext4_journal_forget(const char *where, handle_t *handle,
-                               struct buffer_head *bh);
-
-/* When called with an invalid handle, this will still do a put on the BH */
-int __ext4_journal_revoke(const char *where, handle_t *handle,
-                               ext4_fsblk_t blocknr, struct buffer_head *bh);
+int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
+                 struct inode *inode, struct buffer_head *bh,
+                 ext4_fsblk_t blocknr);
 
 int __ext4_journal_get_create_access(const char *where,
                                handle_t *handle, struct buffer_head *bh);
@@ -149,12 +145,11 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
        __ext4_journal_get_undo_access(__func__, (handle), (bh))
 #define ext4_journal_get_write_access(handle, bh) \
        __ext4_journal_get_write_access(__func__, (handle), (bh))
-#define ext4_journal_revoke(handle, blocknr, bh) \
-       __ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
+#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
+       __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
+                     (block_nr))
 #define ext4_journal_get_create_access(handle, bh) \
        __ext4_journal_get_create_access(__func__, (handle), (bh))
-#define ext4_journal_forget(handle, bh) \
-       __ext4_journal_forget(__func__, (handle), (bh))
 #define ext4_handle_dirty_metadata(handle, inode, bh) \
        __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
 
@@ -254,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
        return 0;
 }
 
+static inline void ext4_update_inode_fsync_trans(handle_t *handle,
+                                                struct inode *inode,
+                                                int datasync)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+
+       if (ext4_handle_valid(handle)) {
+               ei->i_sync_tid = handle->h_transaction->t_tid;
+               if (datasync)
+                       ei->i_datasync_tid = handle->h_transaction->t_tid;
+       }
+}
+
 /* super.c */
 int ext4_force_commit(struct super_block *sb);
 
index 715264b4bae4b421c110af69ed3ccf8dc32bb1f6..3a7928f825e45f8fb244587cbe7c8921f6a3269b 100644 (file)
@@ -1007,7 +1007,8 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
-                       ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
+                       ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+                                        EXT4_FREE_BLOCKS_METADATA);
                }
        }
        kfree(ablocks);
@@ -1761,7 +1762,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
        while (block < last && block != EXT_MAX_BLOCK) {
                num = last - block;
                /* find extent for this block */
+               down_read(&EXT4_I(inode)->i_data_sem);
                path = ext4_ext_find_extent(inode, block, path);
+               up_read(&EXT4_I(inode)->i_data_sem);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        path = NULL;
@@ -1957,7 +1960,6 @@ errout:
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path *path)
 {
-       struct buffer_head *bh;
        int err;
        ext4_fsblk_t leaf;
 
@@ -1973,9 +1975,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        if (err)
                return err;
        ext_debug("index is empty, remove it, free block %llu\n", leaf);
-       bh = sb_find_get_block(inode->i_sb, leaf);
-       ext4_forget(handle, 1, inode, bh, leaf);
-       ext4_free_blocks(handle, inode, leaf, 1, 1);
+       ext4_free_blocks(handle, inode, 0, leaf, 1,
+                        EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return err;
 }
 
@@ -2042,12 +2043,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                                struct ext4_extent *ex,
                                ext4_lblk_t from, ext4_lblk_t to)
 {
-       struct buffer_head *bh;
        unsigned short ee_len =  ext4_ext_get_actual_len(ex);
-       int i, metadata = 0;
+       int flags = EXT4_FREE_BLOCKS_FORGET;
 
        if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-               metadata = 1;
+               flags |= EXT4_FREE_BLOCKS_METADATA;
 #ifdef EXTENTS_STATS
        {
                struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2072,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                num = le32_to_cpu(ex->ee_block) + ee_len - from;
                start = ext_pblock(ex) + ee_len - num;
                ext_debug("free last %u blocks starting %llu\n", num, start);
-               for (i = 0; i < num; i++) {
-                       bh = sb_find_get_block(inode->i_sb, start + i);
-                       ext4_forget(handle, 0, inode, bh, start + i);
-               }
-               ext4_free_blocks(handle, inode, start, num, metadata);
+               ext4_free_blocks(handle, inode, 0, start, num, flags);
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
                printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2167,7 +2163,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                        correct_index = 1;
                        credits += (ext_depth(inode)) + 1;
                }
-               credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+               credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 
                err = ext4_ext_truncate_extend_restart(handle, inode, credits);
                if (err)
@@ -3064,6 +3060,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
        if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
                ret = ext4_convert_unwritten_extents_dio(handle, inode,
                                                        path);
+               if (ret >= 0)
+                       ext4_update_inode_fsync_trans(handle, inode, 1);
                goto out2;
        }
        /* buffered IO case */
@@ -3091,6 +3089,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
        ret = ext4_ext_convert_to_initialized(handle, inode,
                                                path, iblock,
                                                max_blocks);
+       if (ret >= 0)
+               ext4_update_inode_fsync_trans(handle, inode, 1);
 out:
        if (ret <= 0) {
                err = ret;
@@ -3319,8 +3319,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                /* not a good idea to call discard here directly,
                 * but otherwise we'd need to call it every free() */
                ext4_discard_preallocations(inode);
-               ext4_free_blocks(handle, inode, ext_pblock(&newex),
-                                       ext4_ext_get_actual_len(&newex), 0);
+               ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+                                ext4_ext_get_actual_len(&newex), 0);
                goto out2;
        }
 
@@ -3329,10 +3329,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        allocated = ext4_ext_get_actual_len(&newex);
        set_buffer_new(bh_result);
 
-       /* Cache only when it is _not_ an uninitialized extent */
-       if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
+       /*
+        * Cache the extent and update transaction to commit on fdatasync only
+        * when it is _not_ an uninitialized extent.
+        */
+       if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
                ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
                                                EXT4_EXT_CACHE_EXTENT);
+               ext4_update_inode_fsync_trans(handle, inode, 1);
+       } else
+               ext4_update_inode_fsync_trans(handle, inode, 0);
 out:
        if (allocated > max_blocks)
                allocated = max_blocks;
@@ -3720,10 +3726,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 * Walk the extent tree gathering extent information.
                 * ext4_ext_fiemap_cb will push extents back to user.
                 */
-               down_read(&EXT4_I(inode)->i_data_sem);
                error = ext4_ext_walk_space(inode, start_blk, len_blks,
                                          ext4_ext_fiemap_cb, fieinfo);
-               up_read(&EXT4_I(inode)->i_data_sem);
        }
 
        return error;
index 2b1531266ee25010776dcebe8921103da29d9c3a..0b22497d92e1b079e2eb10c22eb7a6ce6f546153 100644 (file)
 int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
 {
        struct inode *inode = dentry->d_inode;
+       struct ext4_inode_info *ei = EXT4_I(inode);
        journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
-       int err, ret = 0;
+       int ret;
+       tid_t commit_tid;
 
        J_ASSERT(ext4_journal_current_handle() == NULL);
 
        trace_ext4_sync_file(file, dentry, datasync);
 
+       if (inode->i_sb->s_flags & MS_RDONLY)
+               return 0;
+
        ret = flush_aio_dio_completed_IO(inode);
        if (ret < 0)
-               goto out;
+               return ret;
+       
+       if (!journal)
+               return simple_fsync(file, dentry, datasync);
+
        /*
-        * data=writeback:
+        * data=writeback,ordered:
         *  The caller's filemap_fdatawrite()/wait will sync the data.
-        *  sync_inode() will sync the metadata
-        *
-        * data=ordered:
-        *  The caller's filemap_fdatawrite() will write the data and
-        *  sync_inode() will write the inode if it is dirty.  Then the caller's
-        *  filemap_fdatawait() will wait on the pages.
+        *  Metadata is in the journal, we wait for proper transaction to
+        *  commit here.
         *
         * data=journal:
         *  filemap_fdatawrite won't do anything (the buffers are clean).
@@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
         *  (they were dirtied by commit).  But that's OK - the blocks are
         *  safe in-journal, which is all fsync() needs to ensure.
         */
-       if (ext4_should_journal_data(inode)) {
-               ret = ext4_force_commit(inode->i_sb);
-               goto out;
-       }
+       if (ext4_should_journal_data(inode))
+               return ext4_force_commit(inode->i_sb);
 
-       if (!journal)
-               ret = sync_mapping_buffers(inode->i_mapping);
-
-       if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-               goto out;
-
-       /*
-        * The VFS has written the file data.  If the inode is unaltered
-        * then we need not start a commit.
-        */
-       if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
-               struct writeback_control wbc = {
-                       .sync_mode = WB_SYNC_ALL,
-                       .nr_to_write = 0, /* sys_fsync did this */
-               };
-               err = sync_inode(inode, &wbc);
-               if (ret == 0)
-                       ret = err;
-       }
-out:
-       if (journal && (journal->j_flags & JBD2_BARRIER))
+       commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
+       if (jbd2_log_start_commit(journal, commit_tid))
+               jbd2_log_wait_commit(journal, commit_tid);
+       else if (journal->j_flags & JBD2_BARRIER)
                blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
        return ret;
 }
index 4e8e2f15b8bd6f96e834826192fec6e0ec09758d..5352db1a3086a075f777652b1ac496a1f2c84aaa 100644 (file)
@@ -70,58 +70,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
        return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 }
 
-/*
- * The ext4 forget function must perform a revoke if we are freeing data
- * which has been journaled.  Metadata (eg. indirect blocks) must be
- * revoked in all cases.
- *
- * "bh" may be NULL: a metadata block may have been freed from memory
- * but there may still be a record of it in the journal, and that record
- * still needs to be revoked.
- *
- * If the handle isn't valid we're not journaling, but we still need to
- * call into ext4_journal_revoke() to put the buffer head.
- */
-int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
-               struct buffer_head *bh, ext4_fsblk_t blocknr)
-{
-       int err;
-
-       might_sleep();
-
-       BUFFER_TRACE(bh, "enter");
-
-       jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
-                 "data mode %x\n",
-                 bh, is_metadata, inode->i_mode,
-                 test_opt(inode->i_sb, DATA_FLAGS));
-
-       /* Never use the revoke function if we are doing full data
-        * journaling: there is no need to, and a V1 superblock won't
-        * support it.  Otherwise, only skip the revoke on un-journaled
-        * data blocks. */
-
-       if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
-           (!is_metadata && !ext4_should_journal_data(inode))) {
-               if (bh) {
-                       BUFFER_TRACE(bh, "call jbd2_journal_forget");
-                       return ext4_journal_forget(handle, bh);
-               }
-               return 0;
-       }
-
-       /*
-        * data!=journal && (is_metadata || should_journal_data(inode))
-        */
-       BUFFER_TRACE(bh, "call ext4_journal_revoke");
-       err = ext4_journal_revoke(handle, blocknr, bh);
-       if (err)
-               ext4_abort(inode->i_sb, __func__,
-                          "error %d when attempting revoke", err);
-       BUFFER_TRACE(bh, "exit");
-       return err;
-}
-
 /*
  * Work out how many blocks we need to proceed with the next chunk of a
  * truncate transaction.
@@ -721,7 +669,7 @@ allocated:
        return ret;
 failed_out:
        for (i = 0; i < index; i++)
-               ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
        return ret;
 }
 
@@ -817,14 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
        return err;
 failed:
        /* Allocation failed, free what we already allocated */
+       ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
        for (i = 1; i <= n ; i++) {
-               BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
-               ext4_journal_forget(handle, branch[i].bh);
+               /* 
+                * branch[i].bh is newly allocated, so there is no
+                * need to revoke the block, which is why we don't
+                * need to set EXT4_FREE_BLOCKS_METADATA.
+                */
+               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+                                EXT4_FREE_BLOCKS_FORGET);
        }
-       for (i = 0; i < indirect_blks; i++)
-               ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
+       for (i = n+1; i < indirect_blks; i++)
+               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
 
-       ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
+       ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
 
        return err;
 }
@@ -903,12 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 
 err_out:
        for (i = 1; i <= num; i++) {
-               BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
-               ext4_journal_forget(handle, where[i].bh);
-               ext4_free_blocks(handle, inode,
-                                       le32_to_cpu(where[i-1].key), 1, 0);
+               /* 
+                * branch[i].bh is newly allocated, so there is no
+                * need to revoke the block, which is why we don't
+                * need to set EXT4_FREE_BLOCKS_METADATA.
+                */
+               ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+                                EXT4_FREE_BLOCKS_FORGET);
        }
-       ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0);
+       ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+                        blks, 0);
 
        return err;
 }
@@ -1021,10 +979,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
        if (!err)
                err = ext4_splice_branch(handle, inode, iblock,
                                         partial, indirect_blks, count);
-       else
+       if (err)
                goto cleanup;
 
        set_buffer_new(bh_result);
+
+       ext4_update_inode_fsync_trans(handle, inode, 1);
 got_it:
        map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
        if (count > blocks_to_boundary)
@@ -1052,7 +1012,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
                EXT4_I(inode)->i_reserved_meta_blocks;
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-       return total;
+       return (total << inode->i_blkbits);
 }
 /*
  * Calculate the number of metadata blocks need to reserve
@@ -1534,6 +1494,16 @@ static int do_journal_get_write_access(handle_t *handle,
        return ext4_journal_get_write_access(handle, bh);
 }
 
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static void ext4_truncate_failed_write(struct inode *inode)
+{
+       truncate_inode_pages(inode->i_mapping, inode->i_size);
+       ext4_truncate(inode);
+}
+
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
                            loff_t pos, unsigned len, unsigned flags,
                            struct page **pagep, void **fsdata)
@@ -1599,7 +1569,7 @@ retry:
 
                ext4_journal_stop(handle);
                if (pos + len > inode->i_size) {
-                       ext4_truncate(inode);
+                       ext4_truncate_failed_write(inode);
                        /*
                         * If truncate failed early the inode might
                         * still be on the orphan list; we need to
@@ -1709,7 +1679,7 @@ static int ext4_ordered_write_end(struct file *file,
                ret = ret2;
 
        if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                /*
                 * If truncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
@@ -1751,7 +1721,7 @@ static int ext4_writeback_write_end(struct file *file,
                ret = ret2;
 
        if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                /*
                 * If truncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
@@ -1814,7 +1784,7 @@ static int ext4_journalled_write_end(struct file *file,
        if (!ret)
                ret = ret2;
        if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                /*
                 * If truncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
@@ -2600,7 +2570,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
 }
 
 static int __ext4_journalled_writepage(struct page *page,
-                                      struct writeback_control *wbc,
                                       unsigned int len)
 {
        struct address_space *mapping = page->mapping;
@@ -2758,7 +2727,7 @@ static int ext4_writepage(struct page *page,
                 * doesn't seem much point in redirtying the page here.
                 */
                ClearPageChecked(page);
-               return __ext4_journalled_writepage(page, wbc, len);
+               return __ext4_journalled_writepage(page, len);
        }
 
        if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2788,7 +2757,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
         * number of contiguous block. So we will limit
         * number of contiguous block to a sane value
         */
-       if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
            (max_blocks > EXT4_MAX_TRANS_DATA))
                max_blocks = EXT4_MAX_TRANS_DATA;
 
@@ -3091,7 +3060,7 @@ retry:
                 * i_size_read because we hold i_mutex.
                 */
                if (pos + len > inode->i_size)
-                       ext4_truncate(inode);
+                       ext4_truncate_failed_write(inode);
        }
 
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -4120,6 +4089,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
                              __le32 *last)
 {
        __le32 *p;
+       int     flags = EXT4_FREE_BLOCKS_FORGET;
+
+       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+               flags |= EXT4_FREE_BLOCKS_METADATA;
+
        if (try_to_extend_transaction(handle, inode)) {
                if (bh) {
                        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4134,27 +4108,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
                }
        }
 
-       /*
-        * Any buffers which are on the journal will be in memory. We
-        * find them on the hash table so jbd2_journal_revoke() will
-        * run jbd2_journal_forget() on them.  We've already detached
-        * each block from the file, so bforget() in
-        * jbd2_journal_forget() should be safe.
-        *
-        * AKPM: turn on bforget in jbd2_journal_forget()!!!
-        */
-       for (p = first; p < last; p++) {
-               u32 nr = le32_to_cpu(*p);
-               if (nr) {
-                       struct buffer_head *tbh;
-
-                       *p = 0;
-                       tbh = sb_find_get_block(inode->i_sb, nr);
-                       ext4_forget(handle, 0, inode, tbh, nr);
-               }
-       }
+       for (p = first; p < last; p++)
+               *p = 0;
 
-       ext4_free_blocks(handle, inode, block_to_free, count, 0);
+       ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
 }
 
 /**
@@ -4342,7 +4299,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                            blocks_for_truncate(inode));
                        }
 
-                       ext4_free_blocks(handle, inode, nr, 1, 1);
+                       ext4_free_blocks(handle, inode, 0, nr, 1,
+                                        EXT4_FREE_BLOCKS_METADATA);
 
                        if (parent_bh) {
                                /*
@@ -4781,8 +4739,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        struct ext4_iloc iloc;
        struct ext4_inode *raw_inode;
        struct ext4_inode_info *ei;
-       struct buffer_head *bh;
        struct inode *inode;
+       journal_t *journal = EXT4_SB(sb)->s_journal;
        long ret;
        int block;
 
@@ -4793,11 +4751,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                return inode;
 
        ei = EXT4_I(inode);
+       iloc.bh = 0;
 
        ret = __ext4_get_inode_loc(inode, &iloc, 0);
        if (ret < 0)
                goto bad_inode;
-       bh = iloc.bh;
        raw_inode = ext4_raw_inode(&iloc);
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@@ -4820,7 +4778,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                if (inode->i_mode == 0 ||
                    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
                        /* this inode is deleted */
-                       brelse(bh);
                        ret = -ESTALE;
                        goto bad_inode;
                }
@@ -4848,11 +4805,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                ei->i_data[block] = raw_inode->i_block[block];
        INIT_LIST_HEAD(&ei->i_orphan);
 
+       /*
+        * Set transaction id's of transactions that have to be committed
+        * to finish f[data]sync. We set them to currently running transaction
+        * as we cannot be sure that the inode or some of its metadata isn't
+        * part of the transaction - the inode could have been reclaimed and
+        * now it is reread from disk.
+        */
+       if (journal) {
+               transaction_t *transaction;
+               tid_t tid;
+
+               spin_lock(&journal->j_state_lock);
+               if (journal->j_running_transaction)
+                       transaction = journal->j_running_transaction;
+               else
+                       transaction = journal->j_committing_transaction;
+               if (transaction)
+                       tid = transaction->t_tid;
+               else
+                       tid = journal->j_commit_sequence;
+               spin_unlock(&journal->j_state_lock);
+               ei->i_sync_tid = tid;
+               ei->i_datasync_tid = tid;
+       }
+
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
                    EXT4_INODE_SIZE(inode->i_sb)) {
-                       brelse(bh);
                        ret = -EIO;
                        goto bad_inode;
                }
@@ -4884,10 +4865,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 
        ret = 0;
        if (ei->i_file_acl &&
-           ((ei->i_file_acl <
-             (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
-              EXT4_SB(sb)->s_gdb_count)) ||
-            (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
+           !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
                ext4_error(sb, __func__,
                           "bad extended attribute block %llu in inode #%lu",
                           ei->i_file_acl, inode->i_ino);
@@ -4905,10 +4883,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                /* Validate block references which are part of inode */
                ret = ext4_check_inode_blockref(inode);
        }
-       if (ret) {
-               brelse(bh);
+       if (ret)
                goto bad_inode;
-       }
 
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext4_file_inode_operations;
@@ -4936,7 +4912,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        init_special_inode(inode, inode->i_mode,
                           new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
        } else {
-               brelse(bh);
                ret = -EIO;
                ext4_error(inode->i_sb, __func__,
                           "bogus i_mode (%o) for inode=%lu",
@@ -4949,6 +4924,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        return inode;
 
 bad_inode:
+       brelse(iloc.bh);
        iget_failed(inode);
        return ERR_PTR(ret);
 }
@@ -5108,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle,
                err = rc;
        ei->i_state &= ~EXT4_STATE_NEW;
 
+       ext4_update_inode_fsync_trans(handle, inode, 0);
 out_brelse:
        brelse(bh);
        ext4_std_error(inode->i_sb, err);
@@ -5227,8 +5204,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 
                /* (user+group)*(old+new) structure, inode write (sb,
                 * inode block, ? - but truncate inode update has it) */
-               handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
-                                       EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+               handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                                       EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
                if (IS_ERR(handle)) {
                        error = PTR_ERR(handle);
                        goto err_out;
index c1cdf613e7258e39f2964768c2d65b6422ce8c96..b63d193126dbf759236a52f075af914bc921d40a 100644 (file)
@@ -221,31 +221,38 @@ setversion_out:
                struct file *donor_filp;
                int err;
 
+               if (!(filp->f_mode & FMODE_READ) ||
+                   !(filp->f_mode & FMODE_WRITE))
+                       return -EBADF;
+
                if (copy_from_user(&me,
                        (struct move_extent __user *)arg, sizeof(me)))
                        return -EFAULT;
+               me.moved_len = 0;
 
                donor_filp = fget(me.donor_fd);
                if (!donor_filp)
                        return -EBADF;
 
-               if (!capable(CAP_DAC_OVERRIDE)) {
-                       if ((current->real_cred->fsuid != inode->i_uid) ||
-                               !(inode->i_mode & S_IRUSR) ||
-                               !(donor_filp->f_dentry->d_inode->i_mode &
-                               S_IRUSR)) {
-                               fput(donor_filp);
-                               return -EACCES;
-                       }
+               if (!(donor_filp->f_mode & FMODE_WRITE)) {
+                       err = -EBADF;
+                       goto mext_out;
                }
 
+               err = mnt_want_write(filp->f_path.mnt);
+               if (err)
+                       goto mext_out;
+
                err = ext4_move_extents(filp, donor_filp, me.orig_start,
                                        me.donor_start, me.len, &me.moved_len);
-               fput(donor_filp);
+               mnt_drop_write(filp->f_path.mnt);
+               if (me.moved_len > 0)
+                       file_remove_suid(donor_filp);
 
                if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
-                       return -EFAULT;
-
+                       err = -EFAULT;
+mext_out:
+               fput(donor_filp);
                return err;
        }
 
index 74e495dabe09a186f27118219400ca615bfc5922..c1e19d5b5985f2a36d39801e697b9b45f7283f50 100644 (file)
@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
        struct ext4_group_info *db;
        int err, count = 0, count2 = 0;
        struct ext4_free_data *entry;
-       ext4_fsblk_t discard_block;
        struct list_head *l, *ltmp;
 
        list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                        page_cache_release(e4b.bd_bitmap_page);
                }
                ext4_unlock_group(sb, entry->group);
-               discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
-                       + entry->start_blk
-                       + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-               trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
-                                         entry->count);
-               sb_issue_discard(sb, discard_block, entry->count);
-
+               if (test_opt(sb, DISCARD)) {
+                       ext4_fsblk_t discard_block;
+                       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+                       discard_block = (ext4_fsblk_t)entry->group *
+                                               EXT4_BLOCKS_PER_GROUP(sb)
+                                       + entry->start_blk
+                                       + le32_to_cpu(es->s_first_data_block);
+                       trace_ext4_discard_blocks(sb,
+                                       (unsigned long long)discard_block,
+                                       entry->count);
+                       sb_issue_discard(sb, discard_block, entry->count);
+               }
                kmem_cache_free(ext4_free_ext_cachep, entry);
                ext4_mb_release_desc(&e4b);
        }
@@ -3005,6 +3010,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
                trace_ext4_mballoc_prealloc(ac);
 }
 
+/*
+ * Called on failure; free up any blocks from the inode PA for this
+ * context.  We don't need this for MB_GROUP_PA because we only change
+ * pa_free in ext4_mb_release_context(), but on failure, we've already
+ * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
+ */
+static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
+{
+       struct ext4_prealloc_space *pa = ac->ac_pa;
+       int len;
+
+       if (pa && pa->pa_type == MB_INODE_PA) {
+               len = ac->ac_b_ex.fe_len;
+               pa->pa_free += len;
+       }
+
+}
+
 /*
  * use blocks preallocated to inode
  */
@@ -4290,6 +4313,7 @@ repeat:
                        ac->ac_status = AC_STATUS_CONTINUE;
                        goto repeat;
                } else if (*errp) {
+                       ext4_discard_allocated_blocks(ac);
                        ac->ac_b_ex.fe_len = 0;
                        ar->len = 0;
                        ext4_mb_show_ac(ac);
@@ -4422,18 +4446,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
        return 0;
 }
 
-/*
- * Main entry point into mballoc to free blocks
+/**
+ * ext4_free_blocks() -- Free given blocks and update quota
+ * @handle:            handle for this transaction
+ * @inode:             inode
+ * @block:             start physical block to free
+ * @count:             number of blocks to count
+ * @metadata:          Are these metadata blocks
  */
-void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t block, unsigned long count,
-                       int metadata, unsigned long *freed)
+void ext4_free_blocks(handle_t *handle, struct inode *inode,
+                     struct buffer_head *bh, ext4_fsblk_t block,
+                     unsigned long count, int flags)
 {
        struct buffer_head *bitmap_bh = NULL;
        struct super_block *sb = inode->i_sb;
        struct ext4_allocation_context *ac = NULL;
        struct ext4_group_desc *gdp;
        struct ext4_super_block *es;
+       unsigned long freed = 0;
        unsigned int overflow;
        ext4_grpblk_t bit;
        struct buffer_head *gd_bh;
@@ -4443,13 +4473,16 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        int err = 0;
        int ret;
 
-       *freed = 0;
+       if (bh) {
+               if (block)
+                       BUG_ON(block != bh->b_blocknr);
+               else
+                       block = bh->b_blocknr;
+       }
 
        sbi = EXT4_SB(sb);
        es = EXT4_SB(sb)->s_es;
-       if (block < le32_to_cpu(es->s_first_data_block) ||
-           block + count < block ||
-           block + count > ext4_blocks_count(es)) {
+       if (!ext4_data_block_valid(sbi, block, count)) {
                ext4_error(sb, __func__,
                            "Freeing blocks not in datazone - "
                            "block = %llu, count = %lu", block, count);
@@ -4457,7 +4490,32 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        }
 
        ext4_debug("freeing block %llu\n", block);
-       trace_ext4_free_blocks(inode, block, count, metadata);
+       trace_ext4_free_blocks(inode, block, count, flags);
+
+       if (flags & EXT4_FREE_BLOCKS_FORGET) {
+               struct buffer_head *tbh = bh;
+               int i;
+
+               BUG_ON(bh && (count > 1));
+
+               for (i = 0; i < count; i++) {
+                       if (!bh)
+                               tbh = sb_find_get_block(inode->i_sb,
+                                                       block + i);
+                       ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 
+                                   inode, tbh, block + i);
+               }
+       }
+
+       /* 
+        * We need to make sure we don't reuse the freed block until
+        * after the transaction is committed, which we can do by
+        * treating the block as metadata, below.  We make an
+        * exception if the inode is to be written in writeback mode
+        * since writeback mode has weak data consistency guarantees.
+        */
+       if (!ext4_should_writeback_data(inode))
+               flags |= EXT4_FREE_BLOCKS_METADATA;
 
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac) {
@@ -4533,7 +4591,8 @@ do_more:
        err = ext4_mb_load_buddy(sb, block_group, &e4b);
        if (err)
                goto error_return;
-       if (metadata && ext4_handle_valid(handle)) {
+
+       if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
                struct ext4_free_data *new_entry;
                /*
                 * blocks being freed are metadata. these blocks shouldn't
@@ -4572,7 +4631,7 @@ do_more:
 
        ext4_mb_release_desc(&e4b);
 
-       *freed += count;
+       freed += count;
 
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4592,6 +4651,8 @@ do_more:
        }
        sb->s_dirt = 1;
 error_return:
+       if (freed)
+               vfs_dq_free_block(inode, freed);
        brelse(bitmap_bh);
        ext4_std_error(sb, err);
        if (ac)
index a93d5b80f3e2f02b1c33592cbd1abc93193677a5..81415814b00b06a0455402078cd41588a615a701 100644 (file)
@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
         * So allocate a credit of 3. We may update
         * quota (user and group).
         */
-       needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+       needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 
        if (ext4_journal_extend(handle, needed) != 0)
                retval = ext4_journal_restart(handle, needed);
@@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
        for (i = 0; i < max_entries; i++) {
                if (tmp_idata[i]) {
                        extend_credit_for_blkdel(handle, inode);
-                       ext4_free_blocks(handle, inode,
-                                       le32_to_cpu(tmp_idata[i]), 1, 1);
+                       ext4_free_blocks(handle, inode, 0,
+                                        le32_to_cpu(tmp_idata[i]), 1,
+                                        EXT4_FREE_BLOCKS_METADATA |
+                                        EXT4_FREE_BLOCKS_FORGET);
                }
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
+       ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+                        EXT4_FREE_BLOCKS_METADATA |
+                        EXT4_FREE_BLOCKS_FORGET);
        return 0;
 }
 
@@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
+       ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+                        EXT4_FREE_BLOCKS_METADATA |
+                        EXT4_FREE_BLOCKS_FORGET);
        return 0;
 }
 
@@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
        /* ei->i_data[EXT4_IND_BLOCK] */
        if (i_data[0]) {
                extend_credit_for_blkdel(handle, inode);
-               ext4_free_blocks(handle, inode,
-                               le32_to_cpu(i_data[0]), 1, 1);
+               ext4_free_blocks(handle, inode, 0,
+                               le32_to_cpu(i_data[0]), 1,
+                                EXT4_FREE_BLOCKS_METADATA |
+                                EXT4_FREE_BLOCKS_FORGET);
        }
 
        /* ei->i_data[EXT4_DIND_BLOCK] */
@@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, block, 1, 1);
+       ext4_free_blocks(handle, inode, 0, block, 1,
+                        EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return retval;
 }
 
@@ -477,7 +486,7 @@ int ext4_ext_migrate(struct inode *inode)
        handle = ext4_journal_start(inode,
                                        EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-                                       2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
+                                       EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
                                        + 1);
        if (IS_ERR(handle)) {
                retval = PTR_ERR(handle);
index 25b6b1457360d2ed1df3ab943a99bac01d4a5e11..82c415be87a46be61a36dc205d24c47955937e03 100644 (file)
@@ -77,12 +77,14 @@ static int
 mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
                      struct ext4_extent **extent)
 {
+       struct ext4_extent_header *eh;
        int ppos, leaf_ppos = path->p_depth;
 
        ppos = leaf_ppos;
        if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
                /* leaf block */
                *extent = ++path[ppos].p_ext;
+               path[ppos].p_block = ext_pblock(path[ppos].p_ext);
                return 0;
        }
 
@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
                                        ext_block_hdr(path[cur_ppos+1].p_bh);
                        }
 
+                       path[leaf_ppos].p_ext = *extent = NULL;
+
+                       eh = path[leaf_ppos].p_hdr;
+                       if (le16_to_cpu(eh->eh_entries) == 0)
+                               /* empty leaf is found */
+                               return -ENODATA;
+
                        /* leaf block */
                        path[leaf_ppos].p_ext = *extent =
                                EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
+                       path[leaf_ppos].p_block =
+                                       ext_pblock(path[leaf_ppos].p_ext);
                        return 0;
                }
        }
@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
 }
 
 /**
- * mext_double_down_read - Acquire two inodes' read semaphore
- *
- * @orig_inode:                original inode structure
- * @donor_inode:       donor inode structure
- * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
- */
-static void
-mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
-{
-       struct inode *first = orig_inode, *second = donor_inode;
-
-       /*
-        * Use the inode number to provide the stable locking order instead
-        * of its address, because the C language doesn't guarantee you can
-        * compare pointers that don't come from the same array.
-        */
-       if (donor_inode->i_ino < orig_inode->i_ino) {
-               first = donor_inode;
-               second = orig_inode;
-       }
-
-       down_read(&EXT4_I(first)->i_data_sem);
-       down_read(&EXT4_I(second)->i_data_sem);
-}
-
-/**
- * mext_double_down_write - Acquire two inodes' write semaphore
+ * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
  *
  * @orig_inode:                original inode structure
  * @donor_inode:       donor inode structure
- * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
+ * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
+ * i_ino order.
  */
 static void
-mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
+double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
 {
        struct inode *first = orig_inode, *second = donor_inode;
 
@@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
        }
 
        down_write(&EXT4_I(first)->i_data_sem);
-       down_write(&EXT4_I(second)->i_data_sem);
+       down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
 }
 
 /**
- * mext_double_up_read - Release two inodes' read semaphore
+ * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
  *
  * @orig_inode:                original inode structure to be released its lock first
  * @donor_inode:       donor inode structure to be released its lock second
- * Release read semaphore of two inodes (orig and donor).
+ * Release write lock of i_data_sem of two inodes (orig and donor).
  */
 static void
-mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
-{
-       up_read(&EXT4_I(orig_inode)->i_data_sem);
-       up_read(&EXT4_I(donor_inode)->i_data_sem);
-}
-
-/**
- * mext_double_up_write - Release two inodes' write semaphore
- *
- * @orig_inode:                original inode structure to be released its lock first
- * @donor_inode:       donor inode structure to be released its lock second
- * Release write semaphore of two inodes (orig and donor).
- */
-static void
-mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
+double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
 {
        up_write(&EXT4_I(orig_inode)->i_data_sem);
        up_write(&EXT4_I(donor_inode)->i_data_sem);
@@ -596,7 +568,7 @@ out:
  * @tmp_oext:          the extent that will belong to the donor inode
  * @orig_off:          block offset of original inode
  * @donor_off:         block offset of donor inode
- * @max_count:         the maximun length of extents
+ * @max_count:         the maximum length of extents
  *
  * Return 0 on success, or a negative error value on failure.
  */
@@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
  * @donor_inode:       donor inode
  * @from:              block offset of orig_inode
  * @count:             block count to be replaced
+ * @err:               pointer to save return value
  *
  * Replace original inode extents and donor inode extents page by page.
  * We implement this replacement in the following three steps:
@@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
  * 3. Change the block information of donor inode to point at the saved
  *    original inode blocks in the dummy extents.
  *
- * Return 0 on success, or a negative error value on failure.
+ * Return replaced block count.
  */
 static int
 mext_replace_branches(handle_t *handle, struct inode *orig_inode,
                           struct inode *donor_inode, ext4_lblk_t from,
-                          ext4_lblk_t count)
+                          ext4_lblk_t count, int *err)
 {
        struct ext4_ext_path *orig_path = NULL;
        struct ext4_ext_path *donor_path = NULL;
        struct ext4_extent *oext, *dext;
        struct ext4_extent tmp_dext, tmp_oext;
        ext4_lblk_t orig_off = from, donor_off = from;
-       int err = 0;
        int depth;
        int replaced_count = 0;
        int dext_alen;
 
-       mext_double_down_write(orig_inode, donor_inode);
+       /* Protect extent trees against block allocations via delalloc */
+       double_down_write_data_sem(orig_inode, donor_inode);
 
        /* Get the original extent for the block "orig_off" */
-       err = get_ext_path(orig_inode, orig_off, &orig_path);
-       if (err)
+       *err = get_ext_path(orig_inode, orig_off, &orig_path);
+       if (*err)
                goto out;
 
        /* Get the donor extent for the head */
-       err = get_ext_path(donor_inode, donor_off, &donor_path);
-       if (err)
+       *err = get_ext_path(donor_inode, donor_off, &donor_path);
+       if (*err)
                goto out;
        depth = ext_depth(orig_inode);
        oext = orig_path[depth].p_ext;
@@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
        dext = donor_path[depth].p_ext;
        tmp_dext = *dext;
 
-       err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+       *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
                                      donor_off, count);
-       if (err)
+       if (*err)
                goto out;
 
        /* Loop for the donor extents */
@@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
                if (!dext) {
                        ext4_error(donor_inode->i_sb, __func__,
                                   "The extent for donor must be found");
-                       err = -EIO;
+                       *err = -EIO;
                        goto out;
                } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
                        ext4_error(donor_inode->i_sb, __func__,
@@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
                                "extent(%u) should be equal",
                                donor_off,
                                le32_to_cpu(tmp_dext.ee_block));
-                       err = -EIO;
+                       *err = -EIO;
                        goto out;
                }
 
                /* Set donor extent to orig extent */
-               err = mext_leaf_block(handle, orig_inode,
+               *err = mext_leaf_block(handle, orig_inode,
                                           orig_path, &tmp_dext, &orig_off);
-               if (err < 0)
+               if (*err)
                        goto out;
 
                /* Set orig extent to donor extent */
-               err = mext_leaf_block(handle, donor_inode,
+               *err = mext_leaf_block(handle, donor_inode,
                                           donor_path, &tmp_oext, &donor_off);
-               if (err < 0)
+               if (*err)
                        goto out;
 
                dext_alen = ext4_ext_get_actual_len(&tmp_dext);
@@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
 
                if (orig_path)
                        ext4_ext_drop_refs(orig_path);
-               err = get_ext_path(orig_inode, orig_off, &orig_path);
-               if (err)
+               *err = get_ext_path(orig_inode, orig_off, &orig_path);
+               if (*err)
                        goto out;
                depth = ext_depth(orig_inode);
                oext = orig_path[depth].p_ext;
-               if (le32_to_cpu(oext->ee_block) +
-                               ext4_ext_get_actual_len(oext) <= orig_off) {
-                       err = 0;
-                       goto out;
-               }
                tmp_oext = *oext;
 
                if (donor_path)
                        ext4_ext_drop_refs(donor_path);
-               err = get_ext_path(donor_inode, donor_off, &donor_path);
-               if (err)
+               *err = get_ext_path(donor_inode, donor_off, &donor_path);
+               if (*err)
                        goto out;
                depth = ext_depth(donor_inode);
                dext = donor_path[depth].p_ext;
-               if (le32_to_cpu(dext->ee_block) +
-                               ext4_ext_get_actual_len(dext) <= donor_off) {
-                       err = 0;
-                       goto out;
-               }
                tmp_dext = *dext;
 
-               err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+               *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
                                           donor_off, count - replaced_count);
-               if (err)
+               if (*err)
                        goto out;
        }
 
@@ -795,8 +758,12 @@ out:
                kfree(donor_path);
        }
 
-       mext_double_up_write(orig_inode, donor_inode);
-       return err;
+       ext4_ext_invalidate_cache(orig_inode);
+       ext4_ext_invalidate_cache(donor_inode);
+
+       double_up_write_data_sem(orig_inode, donor_inode);
+
+       return replaced_count;
 }
 
 /**
@@ -808,16 +775,17 @@ out:
  * @data_offset_in_page:       block index where data swapping starts
  * @block_len_in_page:         the number of blocks to be swapped
  * @uninit:                    orig extent is uninitialized or not
+ * @err:                       pointer to save return value
  *
  * Save the data in original inode blocks and replace original inode extents
  * with donor inode extents by calling mext_replace_branches().
- * Finally, write out the saved data in new original inode blocks. Return 0
- * on success, or a negative error value on failure.
+ * Finally, write out the saved data in new original inode blocks. Return
+ * replaced block count.
  */
 static int
 move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
                  pgoff_t orig_page_offset, int data_offset_in_page,
-                 int block_len_in_page, int uninit)
+                 int block_len_in_page, int uninit, int *err)
 {
        struct inode *orig_inode = o_filp->f_dentry->d_inode;
        struct address_space *mapping = orig_inode->i_mapping;
@@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
        long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
        unsigned long blocksize = orig_inode->i_sb->s_blocksize;
        unsigned int w_flags = 0;
-       unsigned int tmp_data_len, data_len;
+       unsigned int tmp_data_size, data_size, replaced_size;
        void *fsdata;
-       int ret, i, jblocks;
+       int i, jblocks;
+       int err2 = 0;
+       int replaced_count = 0;
        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
 
        /*
@@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
        jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
        handle = ext4_journal_start(orig_inode, jblocks);
        if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
-               return ret;
+               *err = PTR_ERR(handle);
+               return 0;
        }
 
        if (segment_eq(get_fs(), KERNEL_DS))
@@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
         * Just swap data blocks between orig and donor.
         */
        if (uninit) {
-               ret = mext_replace_branches(handle, orig_inode,
-                                                donor_inode, orig_blk_offset,
-                                                block_len_in_page);
-
-               /* Clear the inode cache not to refer to the old data */
-               ext4_ext_invalidate_cache(orig_inode);
-               ext4_ext_invalidate_cache(donor_inode);
+               replaced_count = mext_replace_branches(handle, orig_inode,
+                                               donor_inode, orig_blk_offset,
+                                               block_len_in_page, err);
                goto out2;
        }
 
        offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
 
-       /* Calculate data_len */
+       /* Calculate data_size */
        if ((orig_blk_offset + block_len_in_page - 1) ==
            ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
                /* Replace the last block */
-               tmp_data_len = orig_inode->i_size & (blocksize - 1);
+               tmp_data_size = orig_inode->i_size & (blocksize - 1);
                /*
-                * If data_len equal zero, it shows data_len is multiples of
+                * If data_size equal zero, it shows data_size is multiples of
                 * blocksize. So we set appropriate value.
                 */
-               if (tmp_data_len == 0)
-                       tmp_data_len = blocksize;
+               if (tmp_data_size == 0)
+                       tmp_data_size = blocksize;
 
-               data_len = tmp_data_len +
+               data_size = tmp_data_size +
                        ((block_len_in_page - 1) << orig_inode->i_blkbits);
-       } else {
-               data_len = block_len_in_page << orig_inode->i_blkbits;
-       }
+       } else
+               data_size = block_len_in_page << orig_inode->i_blkbits;
+
+       replaced_size = data_size;
 
-       ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
+       *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
                                 &page, &fsdata);
-       if (unlikely(ret < 0))
+       if (unlikely(*err < 0))
                goto out;
 
        if (!PageUptodate(page)) {
@@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
        /* Release old bh and drop refs */
        try_to_release_page(page, 0);
 
-       ret = mext_replace_branches(handle, orig_inode, donor_inode,
-                                        orig_blk_offset, block_len_in_page);
-       if (ret < 0)
-               goto out;
-
-       /* Clear the inode cache not to refer to the old data */
-       ext4_ext_invalidate_cache(orig_inode);
-       ext4_ext_invalidate_cache(donor_inode);
+       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
+                                       orig_blk_offset, block_len_in_page,
+                                       &err2);
+       if (err2) {
+               if (replaced_count) {
+                       block_len_in_page = replaced_count;
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+               } else
+                       goto out;
+       }
 
        if (!page_has_buffers(page))
                create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
@@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
                bh = bh->b_this_page;
 
        for (i = 0; i < block_len_in_page; i++) {
-               ret = ext4_get_block(orig_inode,
+               *err = ext4_get_block(orig_inode,
                                (sector_t)(orig_blk_offset + i), bh, 0);
-               if (ret < 0)
+               if (*err < 0)
                        goto out;
 
                if (bh->b_this_page != NULL)
                        bh = bh->b_this_page;
        }
 
-       ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
+       *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
                               page, fsdata);
        page = NULL;
 
@@ -951,7 +921,10 @@ out:
 out2:
        ext4_journal_stop(handle);
 
-       return ret < 0 ? ret : 0;
+       if (err2)
+               *err = err2;
+
+       return replaced_count;
 }
 
 /**
@@ -962,7 +935,6 @@ out2:
  * @orig_start:                logical start offset in block for orig
  * @donor_start:       logical start offset in block for donor
  * @len:               the number of blocks to be moved
- * @moved_len:         moved block length
  *
  * Check the arguments of ext4_move_extents() whether the files can be
  * exchanged with each other.
@@ -970,8 +942,8 @@ out2:
  */
 static int
 mext_check_arguments(struct inode *orig_inode,
-                         struct inode *donor_inode, __u64 orig_start,
-                         __u64 donor_start, __u64 *len, __u64 moved_len)
+                    struct inode *donor_inode, __u64 orig_start,
+                    __u64 donor_start, __u64 *len)
 {
        ext4_lblk_t orig_blocks, donor_blocks;
        unsigned int blkbits = orig_inode->i_blkbits;
@@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode,
                return -EINVAL;
        }
 
+       if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
+               ext4_debug("ext4 move extent: suid or sgid is set"
+                          " to donor file [ino:orig %lu, donor %lu]\n",
+                          orig_inode->i_ino, donor_inode->i_ino);
+               return -EINVAL;
+       }
+
        /* Ext4 move extent does not support swapfile */
        if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
                ext4_debug("ext4 move extent: The argument files should "
@@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode,
                return -EINVAL;
        }
 
-       if (moved_len) {
-               ext4_debug("ext4 move extent: moved_len should be 0 "
-                       "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
-                       donor_inode->i_ino);
-               return -EINVAL;
-       }
-
        if ((orig_start > EXT_MAX_BLOCK) ||
            (donor_start > EXT_MAX_BLOCK) ||
            (*len > EXT_MAX_BLOCK) ||
@@ -1088,7 +1060,7 @@ mext_check_arguments(struct inode *orig_inode,
        }
 
        if (!*len) {
-               ext4_debug("ext4 move extent: len shoudld not be 0 "
+               ext4_debug("ext4 move extent: len should not be 0 "
                        "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
                        donor_inode->i_ino);
                return -EINVAL;
@@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                return -EINVAL;
        }
 
-       /* protect orig and donor against a truncate */
+       /* Protect orig and donor inodes against a truncate */
        ret1 = mext_inode_double_lock(orig_inode, donor_inode);
        if (ret1 < 0)
                return ret1;
 
-       mext_double_down_read(orig_inode, donor_inode);
+       /* Protect extent tree against block allocations via delalloc */
+       double_down_write_data_sem(orig_inode, donor_inode);
        /* Check the filesystem environment whether move_extent can be done */
        ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
-                                       donor_start, &len, *moved_len);
-       mext_double_up_read(orig_inode, donor_inode);
+                                   donor_start, &len);
        if (ret1)
                goto out;
 
@@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                seq_start = le32_to_cpu(ext_cur->ee_block);
                rest_blocks = seq_blocks;
 
-               /* Discard preallocations of two inodes */
-               down_write(&EXT4_I(orig_inode)->i_data_sem);
-               ext4_discard_preallocations(orig_inode);
-               up_write(&EXT4_I(orig_inode)->i_data_sem);
-
-               down_write(&EXT4_I(donor_inode)->i_data_sem);
-               ext4_discard_preallocations(donor_inode);
-               up_write(&EXT4_I(donor_inode)->i_data_sem);
+               /*
+                * Up semaphore to avoid following problems:
+                * a. transaction deadlock among ext4_journal_start,
+                *    ->write_begin via pagefault, and jbd2_journal_commit
+                * b. racing with ->readpage, ->write_begin, and ext4_get_block
+                *    in move_extent_per_page
+                */
+               double_up_write_data_sem(orig_inode, donor_inode);
 
                while (orig_page_offset <= seq_end_page) {
 
                        /* Swap original branches with new branches */
-                       ret1 = move_extent_per_page(o_filp, donor_inode,
+                       block_len_in_page = move_extent_per_page(
+                                               o_filp, donor_inode,
                                                orig_page_offset,
                                                data_offset_in_page,
-                                               block_len_in_page, uninit);
-                       if (ret1 < 0)
-                               goto out;
-                       orig_page_offset++;
+                                               block_len_in_page, uninit,
+                                               &ret1);
+
                        /* Count how many blocks we have exchanged */
                        *moved_len += block_len_in_page;
+                       if (ret1 < 0)
+                               break;
                        if (*moved_len > len) {
                                ext4_error(orig_inode->i_sb, __func__,
                                        "We replaced blocks too much! "
                                        "sum of replaced: %llu requested: %llu",
                                        *moved_len, len);
                                ret1 = -EIO;
-                               goto out;
+                               break;
                        }
 
+                       orig_page_offset++;
                        data_offset_in_page = 0;
                        rest_blocks -= block_len_in_page;
                        if (rest_blocks > blocks_per_page)
@@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                                block_len_in_page = rest_blocks;
                }
 
+               double_down_write_data_sem(orig_inode, donor_inode);
+               if (ret1 < 0)
+                       break;
+
                /* Decrease buffer counter */
                if (holecheck_path)
                        ext4_ext_drop_refs(holecheck_path);
@@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 
        }
 out:
+       if (*moved_len) {
+               ext4_discard_preallocations(orig_inode);
+               ext4_discard_preallocations(donor_inode);
+       }
+
        if (orig_path) {
                ext4_ext_drop_refs(orig_path);
                kfree(orig_path);
@@ -1422,7 +1406,7 @@ out:
                ext4_ext_drop_refs(holecheck_path);
                kfree(holecheck_path);
        }
-
+       double_up_write_data_sem(orig_inode, donor_inode);
        ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
 
        if (ret1)
index 6d2c1b897fc778bd6a08d680aef414a97ac5aa5e..17a17e10dd605198ddfe2dfed530d4cfff6812c1 100644 (file)
@@ -1292,9 +1292,6 @@ errout:
  * add_dirent_to_buf will attempt search the directory block for
  * space.  It will return -ENOSPC if no space is available, and -EIO
  * and -EEXIST if directory entry already exists.
- *
- * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
- * all other cases bh is released.
  */
 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                             struct inode *inode, struct ext4_dir_entry_2 *de,
@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                top = bh->b_data + blocksize - reclen;
                while ((char *) de <= top) {
                        if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
-                                                 bh, offset)) {
-                               brelse(bh);
+                                                 bh, offset))
                                return -EIO;
-                       }
-                       if (ext4_match(namelen, name, de)) {
-                               brelse(bh);
+                       if (ext4_match(namelen, name, de))
                                return -EEXIST;
-                       }
                        nlen = EXT4_DIR_REC_LEN(de->name_len);
                        rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
                        if ((de->inode? rlen - nlen: rlen) >= reclen)
@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        err = ext4_journal_get_write_access(handle, bh);
        if (err) {
                ext4_std_error(dir->i_sb, err);
-               brelse(bh);
                return err;
        }
 
@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        err = ext4_handle_dirty_metadata(handle, dir, bh);
        if (err)
                ext4_std_error(dir->i_sb, err);
-       brelse(bh);
        return 0;
 }
 
@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        if (!(de))
                return retval;
 
-       return add_dirent_to_buf(handle, dentry, inode, de, bh);
+       retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
+       brelse(bh);
+       return retval;
 }
 
 /*
@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                if(!bh)
                        return retval;
                retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
-               if (retval != -ENOSPC)
+               if (retval != -ENOSPC) {
+                       brelse(bh);
                        return retval;
+               }
 
                if (blocks == 1 && !dx_fallback &&
                    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de->inode = 0;
        de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
-       return add_dirent_to_buf(handle, dentry, inode, de, bh);
+       retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
+       brelse(bh);
+       return retval;
 }
 
 /*
@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                goto journal_error;
 
        err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
-       if (err != -ENOSPC) {
-               bh = NULL;
+       if (err != -ENOSPC)
                goto cleanup;
-       }
 
        /* Block full, should compress but for now just split */
        dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
        if (!de)
                goto cleanup;
        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
-       bh = NULL;
        goto cleanup;
 
 journal_error:
@@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
 retry:
        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-                                       2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
+                                       EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 
@@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
 retry:
        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-                                       2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
+                                       EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 
@@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 retry:
        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-                                       2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
+                                       EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 
@@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir,
 retry:
        handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-                                       2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
+                                       EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 
index 3cfc343c41b53e3eae9e3d635519c653f600acc6..3b2c5541d8a686fe7c780a512c0acd3e6771b3ab 100644 (file)
@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb,
                        goto exit_bh;
 
                if (IS_ERR(gdb = bclean(handle, sb, block))) {
-                       err = PTR_ERR(bh);
+                       err = PTR_ERR(gdb);
                        goto exit_bh;
                }
                ext4_handle_dirty_metadata(handle, NULL, gdb);
index d4ca92aab51411de0bab1bf8824e9aa8974b8d17..8b58a144c31b8210f502b34d532a131aaccffce2 100644 (file)
@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb)
        if (sb->s_dirt)
                ext4_commit_super(sb, 1);
 
-       ext4_release_system_zone(sb);
-       ext4_mb_release(sb);
-       ext4_ext_release(sb);
-       ext4_xattr_put_super(sb);
        if (sbi->s_journal) {
                err = jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb)
                        ext4_abort(sb, __func__,
                                   "Couldn't clean up the journal");
        }
+
+       ext4_release_system_zone(sb);
+       ext4_mb_release(sb);
+       ext4_ext_release(sb);
+       ext4_xattr_put_super(sb);
+
        if (!(sb->s_flags & MS_RDONLY)) {
                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
                es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -704,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        spin_lock_init(&(ei->i_block_reservation_lock));
        INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
        ei->cur_aio_dio = NULL;
+       ei->i_sync_tid = 0;
+       ei->i_datasync_tid = 0;
 
        return &ei->vfs_inode;
 }
@@ -899,6 +903,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
        if (test_opt(sb, NO_AUTO_DA_ALLOC))
                seq_puts(seq, ",noauto_da_alloc");
 
+       if (test_opt(sb, DISCARD))
+               seq_puts(seq, ",discard");
+
+       if (test_opt(sb, NOLOAD))
+               seq_puts(seq, ",norecovery");
+
        ext4_show_quota_options(seq, sb);
 
        return 0;
@@ -1079,7 +1089,8 @@ enum {
        Opt_usrquota, Opt_grpquota, Opt_i_version,
        Opt_stripe, Opt_delalloc, Opt_nodelalloc,
        Opt_block_validity, Opt_noblock_validity,
-       Opt_inode_readahead_blks, Opt_journal_ioprio
+       Opt_inode_readahead_blks, Opt_journal_ioprio,
+       Opt_discard, Opt_nodiscard,
 };
 
 static const match_table_t tokens = {
@@ -1104,6 +1115,7 @@ static const match_table_t tokens = {
        {Opt_acl, "acl"},
        {Opt_noacl, "noacl"},
        {Opt_noload, "noload"},
+       {Opt_noload, "norecovery"},
        {Opt_nobh, "nobh"},
        {Opt_bh, "bh"},
        {Opt_commit, "commit=%u"},
@@ -1144,6 +1156,8 @@ static const match_table_t tokens = {
        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
        {Opt_auto_da_alloc, "auto_da_alloc"},
        {Opt_noauto_da_alloc, "noauto_da_alloc"},
+       {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
        {Opt_err, NULL},
 };
 
@@ -1565,6 +1579,12 @@ set_qf_format:
                        else
                                set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
                        break;
+               case Opt_discard:
+                       set_opt(sbi->s_mount_opt, DISCARD);
+                       break;
+               case Opt_nodiscard:
+                       clear_opt(sbi->s_mount_opt, DISCARD);
+                       break;
                default:
                        ext4_msg(sb, KERN_ERR,
                               "Unrecognized mount option \"%s\" "
@@ -1673,14 +1693,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
        size_t size;
        int i;
 
-       if (!sbi->s_es->s_log_groups_per_flex) {
+       sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
+       groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+
+       if (groups_per_flex < 2) {
                sbi->s_log_groups_per_flex = 0;
                return 1;
        }
 
-       sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
-       groups_per_flex = 1 << sbi->s_log_groups_per_flex;
-
        /* We allocate both existing and potentially added groups */
        flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
                        ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
@@ -2721,26 +2741,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
                if (ext4_load_journal(sb, es, journal_devnum))
                        goto failed_mount3;
-               if (!(sb->s_flags & MS_RDONLY) &&
-                   EXT4_SB(sb)->s_journal->j_failed_commit) {
-                       ext4_msg(sb, KERN_CRIT, "error: "
-                              "ext4_fill_super: Journal transaction "
-                              "%u is corrupt",
-                              EXT4_SB(sb)->s_journal->j_failed_commit);
-                       if (test_opt(sb, ERRORS_RO)) {
-                               ext4_msg(sb, KERN_CRIT,
-                                      "Mounting filesystem read-only");
-                               sb->s_flags |= MS_RDONLY;
-                               EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-                               es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-                       }
-                       if (test_opt(sb, ERRORS_PANIC)) {
-                               EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-                               es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-                               ext4_commit_super(sb, 1);
-                               goto failed_mount4;
-                       }
-               }
        } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
              EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
                ext4_msg(sb, KERN_ERR, "required journal recovery "
@@ -3668,13 +3668,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
        buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
                       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
-       ext4_free_blocks_count_set(es, buf->f_bfree);
        buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
        if (buf->f_bfree < ext4_r_blocks_count(es))
                buf->f_bavail = 0;
        buf->f_files = le32_to_cpu(es->s_inodes_count);
        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
-       es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
        buf->f_namelen = EXT4_NAME_LEN;
        fsid = le64_to_cpup((void *)es->s_uuid) ^
               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@@ -3966,6 +3964,58 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags,
        return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
+#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+static struct file_system_type ext2_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "ext2",
+       .get_sb         = ext4_get_sb,
+       .kill_sb        = kill_block_super,
+       .fs_flags       = FS_REQUIRES_DEV,
+};
+
+static inline void register_as_ext2(void)
+{
+       int err = register_filesystem(&ext2_fs_type);
+       if (err)
+               printk(KERN_WARNING
+                      "EXT4-fs: Unable to register as ext2 (%d)\n", err);
+}
+
+static inline void unregister_as_ext2(void)
+{
+       unregister_filesystem(&ext2_fs_type);
+}
+#else
+static inline void register_as_ext2(void) { }
+static inline void unregister_as_ext2(void) { }
+#endif
+
+#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+static struct file_system_type ext3_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "ext3",
+       .get_sb         = ext4_get_sb,
+       .kill_sb        = kill_block_super,
+       .fs_flags       = FS_REQUIRES_DEV,
+};
+
+static inline void register_as_ext3(void)
+{
+       int err = register_filesystem(&ext3_fs_type);
+       if (err)
+               printk(KERN_WARNING
+                      "EXT4-fs: Unable to register as ext3 (%d)\n", err);
+}
+
+static inline void unregister_as_ext3(void)
+{
+       unregister_filesystem(&ext3_fs_type);
+}
+#else
+static inline void register_as_ext3(void) { }
+static inline void unregister_as_ext3(void) { }
+#endif
+
 static struct file_system_type ext4_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext4",
@@ -3995,11 +4045,15 @@ static int __init init_ext4_fs(void)
        err = init_inodecache();
        if (err)
                goto out1;
+       register_as_ext2();
+       register_as_ext3();
        err = register_filesystem(&ext4_fs_type);
        if (err)
                goto out;
        return 0;
 out:
+       unregister_as_ext2();
+       unregister_as_ext3();
        destroy_inodecache();
 out1:
        exit_ext4_xattr();
@@ -4015,6 +4069,8 @@ out4:
 
 static void __exit exit_ext4_fs(void)
 {
+       unregister_as_ext2();
+       unregister_as_ext3();
        unregister_filesystem(&ext4_fs_type);
        destroy_inodecache();
        exit_ext4_xattr();
index fed5b01d7a8daa38cae14572ff7f43f3a1860077..910bf9a59cb39c6018d7c7f4d31c0bc00ef26749 100644 (file)
@@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                ea_bdebug(bh, "refcount now=0; freeing");
                if (ce)
                        mb_cache_entry_free(ce);
-               ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
                get_bh(bh);
-               ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
+               ext4_free_blocks(handle, inode, bh, 0, 1,
+                                EXT4_FREE_BLOCKS_METADATA |
+                                EXT4_FREE_BLOCKS_FORGET);
        } else {
                le32_add_cpu(&BHDR(bh)->h_refcount, -1);
                error = ext4_handle_dirty_metadata(handle, inode, bh);
@@ -832,7 +833,8 @@ inserted:
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
 getblk_failed:
-                               ext4_free_blocks(handle, inode, block, 1, 1);
+                               ext4_free_blocks(handle, inode, 0, block, 1,
+                                                EXT4_FREE_BLOCKS_METADATA);
                                error = -EIO;
                                goto cleanup;
                        }
@@ -988,6 +990,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
        if (error)
                goto cleanup;
 
+       error = ext4_journal_get_write_access(handle, is.iloc.bh);
+       if (error)
+               goto cleanup;
+
        if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
                struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
                memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
@@ -1013,9 +1019,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
                if (flags & XATTR_CREATE)
                        goto cleanup;
        }
-       error = ext4_journal_get_write_access(handle, is.iloc.bh);
-       if (error)
-               goto cleanup;
        if (!value) {
                if (!is.s.not_found)
                        error = ext4_xattr_ibody_set(handle, inode, &i, &is);
index d4cfd6d2779e07fe1d5381975840ccb3e3486566..8896c1d4febe590e1e6941d76f62833a04813201 100644 (file)
@@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                JBUFFER_TRACE(jh, "ph3: write metadata");
                flags = jbd2_journal_write_metadata_buffer(commit_transaction,
                                                      jh, &new_jh, blocknr);
+               if (flags < 0) {
+                       jbd2_journal_abort(journal, flags);
+                       continue;
+               }
                set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
                wbuf[bufs++] = jh2bh(new_jh);
 
index fed85388ee86434509805091a933b34091c50596..b7ca3a92a4dba0b23f555a75cac79fea1b6ae9e4 100644 (file)
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
 EXPORT_SYMBOL(jbd2_journal_ack_err);
 EXPORT_SYMBOL(jbd2_journal_clear_err);
 EXPORT_SYMBOL(jbd2_log_wait_commit);
+EXPORT_SYMBOL(jbd2_log_start_commit);
 EXPORT_SYMBOL(jbd2_journal_start_commit);
 EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
 EXPORT_SYMBOL(jbd2_journal_wipe);
@@ -358,6 +359,10 @@ repeat:
 
                jbd_unlock_bh_state(bh_in);
                tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
+               if (!tmp) {
+                       jbd2_journal_put_journal_head(new_jh);
+                       return -ENOMEM;
+               }
                jbd_lock_bh_state(bh_in);
                if (jh_in->b_frozen_data) {
                        jbd2_free(tmp, bh_in->b_size);
@@ -1248,6 +1253,13 @@ int jbd2_journal_load(journal_t *journal)
        if (jbd2_journal_recover(journal))
                goto recovery_error;
 
+       if (journal->j_failed_commit) {
+               printk(KERN_ERR "JBD2: journal transaction %u on %s "
+                      "is corrupt.\n", journal->j_failed_commit,
+                      journal->j_devname);
+               return -EIO;
+       }
+
        /* OK, we've finished with the dynamic journal bits:
         * reinitialise the dynamic contents of the superblock in memory
         * and reset them on disk. */
index 318f76535bd44c888d2a1045f0f4a520d3c5799a..d0b6cd3afb2fb443d5bd4220f94b5bb698131ea9 100644 (file)
@@ -38,7 +38,7 @@ TRACE_EVENT(ext4_free_inode,
                __entry->blocks = inode->i_blocks;
        ),
 
-       TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
+       TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu",
                  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
                  __entry->mode, __entry->uid, __entry->gid,
                  (unsigned long long) __entry->blocks)
@@ -61,7 +61,7 @@ TRACE_EVENT(ext4_request_inode,
                __entry->mode   = mode;
        ),
 
-       TP_printk("dev %s dir %lu mode %d",
+       TP_printk("dev %s dir %lu mode 0%o",
                  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
                  __entry->mode)
 );
@@ -85,7 +85,7 @@ TRACE_EVENT(ext4_allocate_inode,
                __entry->mode   = mode;
        ),
 
-       TP_printk("dev %s ino %lu dir %lu mode %d",
+       TP_printk("dev %s ino %lu dir %lu mode 0%o",
                  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
                  (unsigned long) __entry->dir, __entry->mode)
 );
@@ -305,7 +305,6 @@ TRACE_EVENT(ext4_da_writepages_result,
                __field(        int,    ret                     )
                __field(        int,    pages_written           )
                __field(        long,   pages_skipped           )
-               __field(        char,   encountered_congestion  )
                __field(        char,   more_io                 )       
                __field(        char,   no_nrwrite_index_update )
                __field(       pgoff_t, writeback_index         )
@@ -317,17 +316,16 @@ TRACE_EVENT(ext4_da_writepages_result,
                __entry->ret            = ret;
                __entry->pages_written  = pages_written;
                __entry->pages_skipped  = wbc->pages_skipped;
-               __entry->encountered_congestion = wbc->encountered_congestion;
                __entry->more_io        = wbc->more_io;
                __entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
                __entry->writeback_index = inode->i_mapping->writeback_index;
        ),
 
-       TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d writeback_index %lu",
+       TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld more_io %d no_nrwrite_index_update %d writeback_index %lu",
                  jbd2_dev_to_name(__entry->dev),
                  (unsigned long) __entry->ino, __entry->ret,
                  __entry->pages_written, __entry->pages_skipped,
-                 __entry->encountered_congestion, __entry->more_io,
+                 __entry->more_io,
                  __entry->no_nrwrite_index_update,
                  (unsigned long) __entry->writeback_index)
 );
@@ -591,30 +589,32 @@ TRACE_EVENT(ext4_allocate_blocks,
 
 TRACE_EVENT(ext4_free_blocks,
        TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
-                       int metadata),
+                int flags),
 
-       TP_ARGS(inode, block, count, metadata),
+       TP_ARGS(inode, block, count, flags),
 
        TP_STRUCT__entry(
                __field(        dev_t,  dev                     )
                __field(        ino_t,  ino                     )
+               __field(      umode_t, mode                     )
                __field(        __u64,  block                   )
                __field(        unsigned long,  count           )
-               __field(        int,    metadata                )
-
+               __field(         int,   flags                   )
        ),
 
        TP_fast_assign(
                __entry->dev            = inode->i_sb->s_dev;
                __entry->ino            = inode->i_ino;
+               __entry->mode           = inode->i_mode;
                __entry->block          = block;
                __entry->count          = count;
-               __entry->metadata       = metadata;
+               __entry->flags          = flags;
        ),
 
-       TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
+       TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
                  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->block, __entry->count, __entry->metadata)
+                 __entry->mode, __entry->block, __entry->count,
+                 __entry->flags)
 );
 
 TRACE_EVENT(ext4_sync_file,
@@ -848,6 +848,32 @@ TRACE_EVENT(ext4_mballoc_free,
                  __entry->result_len, __entry->result_logical)
 );
 
+TRACE_EVENT(ext4_forget,
+       TP_PROTO(struct inode *inode, int is_metadata, __u64 block),
+
+       TP_ARGS(inode, is_metadata, block),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        umode_t, mode                   )
+               __field(        int,    is_metadata             )
+               __field(        __u64,  block                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->mode   = inode->i_mode;
+               __entry->is_metadata = is_metadata;
+               __entry->block  = block;
+       ),
+
+       TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu",
+                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+                 __entry->mode, __entry->is_metadata, __entry->block)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */