Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
[sfrench/cifs-2.6.git] / fs / ext4 / mballoc.c
index bba12824defad2c8d5cee1baaff488576beda8d7..c1e19d5b5985f2a36d39801e697b9b45f7283f50 100644 (file)
  * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
  * value of s_mb_order2_reqs can be tuned via
  * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
- * stripe size (sbi->s_stripe), we try to search for contigous block in
+ * stripe size (sbi->s_stripe), we try to search for contiguous block in
  * stripe size. This should result in better allocation on RAID setups. If
  * not, we search in the specific group using bitmap for best extents. The
  * tunable min_to_scan and max_to_scan control the behaviour here.
@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
        struct ext4_group_info *db;
        int err, count = 0, count2 = 0;
        struct ext4_free_data *entry;
-       ext4_fsblk_t discard_block;
        struct list_head *l, *ltmp;
 
        list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                        page_cache_release(e4b.bd_bitmap_page);
                }
                ext4_unlock_group(sb, entry->group);
-               discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
-                       + entry->start_blk
-                       + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-               trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
-                                         entry->count);
-               sb_issue_discard(sb, discard_block, entry->count);
-
+               if (test_opt(sb, DISCARD)) {
+                       ext4_fsblk_t discard_block;
+                       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+                       discard_block = (ext4_fsblk_t)entry->group *
+                                               EXT4_BLOCKS_PER_GROUP(sb)
+                                       + entry->start_blk
+                                       + le32_to_cpu(es->s_first_data_block);
+                       trace_ext4_discard_blocks(sb,
+                                       (unsigned long long)discard_block,
+                                       entry->count);
+                       sb_issue_discard(sb, discard_block, entry->count);
+               }
                kmem_cache_free(ext4_free_ext_cachep, entry);
                ext4_mb_release_desc(&e4b);
        }
@@ -3005,6 +3010,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
                trace_ext4_mballoc_prealloc(ac);
 }
 
+/*
+ * Called on failure; free up any blocks from the inode PA for this
+ * context.  We don't need this for MB_GROUP_PA because we only change
+ * pa_free in ext4_mb_release_context(), but on failure, we've already
+ * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
+ */
+static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
+{
+       struct ext4_prealloc_space *pa = ac->ac_pa;
+       int len;
+
+       if (pa && pa->pa_type == MB_INODE_PA) {
+               len = ac->ac_b_ex.fe_len;
+               pa->pa_free += len;
+       }
+
+}
+
 /*
  * use blocks preallocated to inode
  */
@@ -4290,6 +4313,7 @@ repeat:
                        ac->ac_status = AC_STATUS_CONTINUE;
                        goto repeat;
                } else if (*errp) {
+                       ext4_discard_allocated_blocks(ac);
                        ac->ac_b_ex.fe_len = 0;
                        ar->len = 0;
                        ext4_mb_show_ac(ac);
@@ -4422,18 +4446,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
        return 0;
 }
 
-/*
- * Main entry point into mballoc to free blocks
+/**
+ * ext4_free_blocks() -- Free given blocks and update quota
+ * @handle:            handle for this transaction
+ * @inode:             inode
+ * @block:             start physical block to free
+ * @count:             number of blocks to count
+ * @metadata:          Are these metadata blocks
  */
-void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t block, unsigned long count,
-                       int metadata, unsigned long *freed)
+void ext4_free_blocks(handle_t *handle, struct inode *inode,
+                     struct buffer_head *bh, ext4_fsblk_t block,
+                     unsigned long count, int flags)
 {
        struct buffer_head *bitmap_bh = NULL;
        struct super_block *sb = inode->i_sb;
        struct ext4_allocation_context *ac = NULL;
        struct ext4_group_desc *gdp;
        struct ext4_super_block *es;
+       unsigned long freed = 0;
        unsigned int overflow;
        ext4_grpblk_t bit;
        struct buffer_head *gd_bh;
@@ -4443,13 +4473,16 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        int err = 0;
        int ret;
 
-       *freed = 0;
+       if (bh) {
+               if (block)
+                       BUG_ON(block != bh->b_blocknr);
+               else
+                       block = bh->b_blocknr;
+       }
 
        sbi = EXT4_SB(sb);
        es = EXT4_SB(sb)->s_es;
-       if (block < le32_to_cpu(es->s_first_data_block) ||
-           block + count < block ||
-           block + count > ext4_blocks_count(es)) {
+       if (!ext4_data_block_valid(sbi, block, count)) {
                ext4_error(sb, __func__,
                            "Freeing blocks not in datazone - "
                            "block = %llu, count = %lu", block, count);
@@ -4457,7 +4490,32 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        }
 
        ext4_debug("freeing block %llu\n", block);
-       trace_ext4_free_blocks(inode, block, count, metadata);
+       trace_ext4_free_blocks(inode, block, count, flags);
+
+       if (flags & EXT4_FREE_BLOCKS_FORGET) {
+               struct buffer_head *tbh = bh;
+               int i;
+
+               BUG_ON(bh && (count > 1));
+
+               for (i = 0; i < count; i++) {
+                       if (!bh)
+                               tbh = sb_find_get_block(inode->i_sb,
+                                                       block + i);
+                       ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 
+                                   inode, tbh, block + i);
+               }
+       }
+
+       /* 
+        * We need to make sure we don't reuse the freed block until
+        * after the transaction is committed, which we can do by
+        * treating the block as metadata, below.  We make an
+        * exception if the inode is to be written in writeback mode
+        * since writeback mode has weak data consistency guarantees.
+        */
+       if (!ext4_should_writeback_data(inode))
+               flags |= EXT4_FREE_BLOCKS_METADATA;
 
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac) {
@@ -4533,7 +4591,8 @@ do_more:
        err = ext4_mb_load_buddy(sb, block_group, &e4b);
        if (err)
                goto error_return;
-       if (metadata && ext4_handle_valid(handle)) {
+
+       if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
                struct ext4_free_data *new_entry;
                /*
                 * blocks being freed are metadata. these blocks shouldn't
@@ -4572,7 +4631,7 @@ do_more:
 
        ext4_mb_release_desc(&e4b);
 
-       *freed += count;
+       freed += count;
 
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4592,6 +4651,8 @@ do_more:
        }
        sb->s_dirt = 1;
 error_return:
+       if (freed)
+               vfs_dq_free_block(inode, freed);
        brelse(bitmap_bh);
        ext4_std_error(sb, err);
        if (ac)