Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

[sfrench/cifs-2.6.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 2c8caa51addb40c3120d9d3c97d91a559a70814e..5352db1a3086a075f777652b1ac496a1f2c84aaa 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -70,58 +70,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
         return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
  }
  
-/*
- * The ext4 forget function must perform a revoke if we are freeing data
- * which has been journaled.  Metadata (eg. indirect blocks) must be
- * revoked in all cases.
- *
- * "bh" may be NULL: a metadata block may have been freed from memory
- * but there may still be a record of it in the journal, and that record
- * still needs to be revoked.
- *
- * If the handle isn't valid we're not journaling, but we still need to
- * call into ext4_journal_revoke() to put the buffer head.
- */
-int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
-               struct buffer_head *bh, ext4_fsblk_t blocknr)
-{
-       int err;
-
-       might_sleep();
-
-       BUFFER_TRACE(bh, "enter");
-
-       jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
-                 "data mode %x\n",
-                 bh, is_metadata, inode->i_mode,
-                 test_opt(inode->i_sb, DATA_FLAGS));
-
-       /* Never use the revoke function if we are doing full data
-        * journaling: there is no need to, and a V1 superblock won't
-        * support it.  Otherwise, only skip the revoke on un-journaled
-        * data blocks. */
-
-       if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
-           (!is_metadata && !ext4_should_journal_data(inode))) {
-               if (bh) {
-                       BUFFER_TRACE(bh, "call jbd2_journal_forget");
-                       return ext4_journal_forget(handle, bh);
-               }
-               return 0;
-       }
-
-       /*
-        * data!=journal && (is_metadata || should_journal_data(inode))
-        */
-       BUFFER_TRACE(bh, "call ext4_journal_revoke");
-       err = ext4_journal_revoke(handle, blocknr, bh);
-       if (err)
-               ext4_abort(inode->i_sb, __func__,
-                          "error %d when attempting revoke", err);
-       BUFFER_TRACE(bh, "exit");
-       return err;
-}
-
  /*
   * Work out how many blocks we need to proceed with the next chunk of a
   * truncate transaction.
@@ -721,7 +669,7 @@ allocated:
         return ret;
  failed_out:
         for (i = 0; i < index; i++)
-               ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
         return ret;
  }
  
@@ -817,14 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
         return err;
  failed:
         /* Allocation failed, free what we already allocated */
+       ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
         for (i = 1; i <= n ; i++) {
-               BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
-               ext4_journal_forget(handle, branch[i].bh);
+               /* 
+                * branch[i].bh is newly allocated, so there is no
+                * need to revoke the block, which is why we don't
+                * need to set EXT4_FREE_BLOCKS_METADATA.
+                */
+               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+                                EXT4_FREE_BLOCKS_FORGET);
         }
-       for (i = 0; i < indirect_blks; i++)
-               ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
+       for (i = n+1; i < indirect_blks; i++)
+               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
  
-       ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
+       ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
  
         return err;
  }
@@ -903,12 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
  
  err_out:
         for (i = 1; i <= num; i++) {
-               BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
-               ext4_journal_forget(handle, where[i].bh);
-               ext4_free_blocks(handle, inode,
-                                       le32_to_cpu(where[i-1].key), 1, 0);
+               /* 
+                * branch[i].bh is newly allocated, so there is no
+                * need to revoke the block, which is why we don't
+                * need to set EXT4_FREE_BLOCKS_METADATA.
+                */
+               ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+                                EXT4_FREE_BLOCKS_FORGET);
         }
-       ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0);
+       ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+                        blks, 0);
  
         return err;
  }
@@ -1021,10 +979,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
         if (!err)
                 err = ext4_splice_branch(handle, inode, iblock,
                                          partial, indirect_blks, count);
-       else
+       if (err)
                 goto cleanup;
  
         set_buffer_new(bh_result);
+
+       ext4_update_inode_fsync_trans(handle, inode, 1);
  got_it:
         map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
         if (count > blocks_to_boundary)
@@ -1052,7 +1012,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
                 EXT4_I(inode)->i_reserved_meta_blocks;
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
  
-       return total;
+       return (total << inode->i_blkbits);
  }
  /*
   * Calculate the number of metadata blocks need to reserve
@@ -1534,6 +1494,16 @@ static int do_journal_get_write_access(handle_t *handle,
         return ext4_journal_get_write_access(handle, bh);
  }
  
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static void ext4_truncate_failed_write(struct inode *inode)
+{
+       truncate_inode_pages(inode->i_mapping, inode->i_size);
+       ext4_truncate(inode);
+}
+
  static int ext4_write_begin(struct file *file, struct address_space *mapping,
                             loff_t pos, unsigned len, unsigned flags,
                             struct page **pagep, void **fsdata)
@@ -1599,7 +1569,7 @@ retry:
  
                 ext4_journal_stop(handle);
                 if (pos + len > inode->i_size) {
-                       ext4_truncate(inode);
+                       ext4_truncate_failed_write(inode);
                         /*
                          * If truncate failed early the inode might
                          * still be on the orphan list; we need to
@@ -1709,7 +1679,7 @@ static int ext4_ordered_write_end(struct file *file,
                 ret = ret2;
  
         if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                 /*
                  * If truncate failed early the inode might still be
                  * on the orphan list; we need to make sure the inode
@@ -1751,7 +1721,7 @@ static int ext4_writeback_write_end(struct file *file,
                 ret = ret2;
  
         if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                 /*
                  * If truncate failed early the inode might still be
                  * on the orphan list; we need to make sure the inode
@@ -1814,7 +1784,7 @@ static int ext4_journalled_write_end(struct file *file,
         if (!ret)
                 ret = ret2;
         if (pos + len > inode->i_size) {
-               ext4_truncate(inode);
+               ext4_truncate_failed_write(inode);
                 /*
                  * If truncate failed early the inode might still be
                  * on the orphan list; we need to make sure the inode
@@ -2600,7 +2570,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
  }
  
  static int __ext4_journalled_writepage(struct page *page,
-                                      struct writeback_control *wbc,
                                        unsigned int len)
  {
         struct address_space *mapping = page->mapping;
@@ -2758,7 +2727,7 @@ static int ext4_writepage(struct page *page,
                  * doesn't seem much point in redirtying the page here.
                  */
                 ClearPageChecked(page);
-               return __ext4_journalled_writepage(page, wbc, len);
+               return __ext4_journalled_writepage(page, len);
         }
  
         if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2788,7 +2757,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
          * number of contiguous block. So we will limit
          * number of contiguous block to a sane value
          */
-       if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
             (max_blocks > EXT4_MAX_TRANS_DATA))
                 max_blocks = EXT4_MAX_TRANS_DATA;
  
@@ -2933,7 +2902,7 @@ retry:
                 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
                                         &mpd);
                 /*
-                * If we have a contigous extent of pages and we
+                * If we have a contiguous extent of pages and we
                  * haven't done the I/O yet, map the blocks and submit
                  * them for I/O.
                  */
@@ -3091,7 +3060,7 @@ retry:
                  * i_size_read because we hold i_mutex.
                  */
                 if (pos + len > inode->i_size)
-                       ext4_truncate(inode);
+                       ext4_truncate_failed_write(inode);
         }
  
         if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -4064,7 +4033,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
         int k, err;
  
         *top = 0;
-       /* Make k index the deepest non-null offest + 1 */
+       /* Make k index the deepest non-null offset + 1 */
         for (k = depth; k > 1 && !offsets[k-1]; k--)
                 ;
         partial = ext4_get_branch(inode, k, offsets, chain, &err);
@@ -4120,6 +4089,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
                               __le32 *last)
  {
         __le32 *p;
+       int     flags = EXT4_FREE_BLOCKS_FORGET;
+
+       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+               flags |= EXT4_FREE_BLOCKS_METADATA;
+
         if (try_to_extend_transaction(handle, inode)) {
                 if (bh) {
                         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4134,27 +4108,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
                 }
         }
  
-       /*
-        * Any buffers which are on the journal will be in memory. We
-        * find them on the hash table so jbd2_journal_revoke() will
-        * run jbd2_journal_forget() on them.  We've already detached
-        * each block from the file, so bforget() in
-        * jbd2_journal_forget() should be safe.
-        *
-        * AKPM: turn on bforget in jbd2_journal_forget()!!!
-        */
-       for (p = first; p < last; p++) {
-               u32 nr = le32_to_cpu(*p);
-               if (nr) {
-                       struct buffer_head *tbh;
-
-                       *p = 0;
-                       tbh = sb_find_get_block(inode->i_sb, nr);
-                       ext4_forget(handle, 0, inode, tbh, nr);
-               }
-       }
+       for (p = first; p < last; p++)
+               *p = 0;
  
-       ext4_free_blocks(handle, inode, block_to_free, count, 0);
+       ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
  }
  
  /**
@@ -4342,7 +4299,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                             blocks_for_truncate(inode));
                         }
  
-                       ext4_free_blocks(handle, inode, nr, 1, 1);
+                       ext4_free_blocks(handle, inode, 0, nr, 1,
+                                        EXT4_FREE_BLOCKS_METADATA);
  
                         if (parent_bh) {
                                 /*
@@ -4781,8 +4739,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         struct ext4_iloc iloc;
         struct ext4_inode *raw_inode;
         struct ext4_inode_info *ei;
-       struct buffer_head *bh;
         struct inode *inode;
+       journal_t *journal = EXT4_SB(sb)->s_journal;
         long ret;
         int block;
  
@@ -4793,11 +4751,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 return inode;
  
         ei = EXT4_I(inode);
+       iloc.bh = 0;
  
         ret = __ext4_get_inode_loc(inode, &iloc, 0);
         if (ret < 0)
                 goto bad_inode;
-       bh = iloc.bh;
         raw_inode = ext4_raw_inode(&iloc);
         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
         inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@@ -4820,7 +4778,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 if (inode->i_mode == 0 ||
                     !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
                         /* this inode is deleted */
-                       brelse(bh);
                         ret = -ESTALE;
                         goto bad_inode;
                 }
@@ -4848,11 +4805,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 ei->i_data[block] = raw_inode->i_block[block];
         INIT_LIST_HEAD(&ei->i_orphan);
  
+       /*
+        * Set transaction id's of transactions that have to be committed
+        * to finish f[data]sync. We set them to currently running transaction
+        * as we cannot be sure that the inode or some of its metadata isn't
+        * part of the transaction - the inode could have been reclaimed and
+        * now it is reread from disk.
+        */
+       if (journal) {
+               transaction_t *transaction;
+               tid_t tid;
+
+               spin_lock(&journal->j_state_lock);
+               if (journal->j_running_transaction)
+                       transaction = journal->j_running_transaction;
+               else
+                       transaction = journal->j_committing_transaction;
+               if (transaction)
+                       tid = transaction->t_tid;
+               else
+                       tid = journal->j_commit_sequence;
+               spin_unlock(&journal->j_state_lock);
+               ei->i_sync_tid = tid;
+               ei->i_datasync_tid = tid;
+       }
+
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
                     EXT4_INODE_SIZE(inode->i_sb)) {
-                       brelse(bh);
                         ret = -EIO;
                         goto bad_inode;
                 }
@@ -4884,10 +4865,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
  
         ret = 0;
         if (ei->i_file_acl &&
-           ((ei->i_file_acl <
-             (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
-              EXT4_SB(sb)->s_gdb_count)) ||
-            (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
+           !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
                 ext4_error(sb, __func__,
                            "bad extended attribute block %llu in inode #%lu",
                            ei->i_file_acl, inode->i_ino);
@@ -4905,10 +4883,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 /* Validate block references which are part of inode */
                 ret = ext4_check_inode_blockref(inode);
         }
-       if (ret) {
-               brelse(bh);
+       if (ret)
                 goto bad_inode;
-       }
  
         if (S_ISREG(inode->i_mode)) {
                 inode->i_op = &ext4_file_inode_operations;
@@ -4936,7 +4912,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                         init_special_inode(inode, inode->i_mode,
                            new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
         } else {
-               brelse(bh);
                 ret = -EIO;
                 ext4_error(inode->i_sb, __func__,
                            "bogus i_mode (%o) for inode=%lu",
@@ -4949,6 +4924,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         return inode;
  
  bad_inode:
+       brelse(iloc.bh);
         iget_failed(inode);
         return ERR_PTR(ret);
  }
@@ -5108,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle,
                 err = rc;
         ei->i_state &= ~EXT4_STATE_NEW;
  
+       ext4_update_inode_fsync_trans(handle, inode, 0);
  out_brelse:
         brelse(bh);
         ext4_std_error(inode->i_sb, err);
@@ -5227,8 +5204,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
  
                 /* (user+group)*(old+new) structure, inode write (sb,
                  * inode block, ? - but truncate inode update has it) */
-               handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
-                                       EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+               handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                                       EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
                 if (IS_ERR(handle)) {
                         error = PTR_ERR(handle);
                         goto err_out;
@@ -5376,7 +5353,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
   * worse case, the indexs blocks spread over different block groups
   *
   * If datablocks are discontiguous, they are possible to spread over
- * different block groups too. If they are contiugous, with flexbg,
+ * different block groups too. If they are contiuguous, with flexbg,
   * they could still across block group boundary.
   *
   * Also account for superblock, inode, quota and xattr blocks
@@ -5452,7 +5429,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
   * Calculate the journal credits for a chunk of data modification.
   *
   * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
+ * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
   *
   * journal buffers for data blocks are not included here, as DIO
   * and fallocate do no need to journal data buffers.