btrfs: improve error handling of btrfs_add_link
[sfrench/cifs-2.6.git] / fs / btrfs / inode.c
index 26b8bec7c2dcace79a2efeedf97ab86f2b957721..3d29505971fe23059b1cbb8adc814d2638a463d9 100644 (file)
@@ -110,17 +110,17 @@ static void __endio_write_update_ordered(struct inode *inode,
  * extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING
  * and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata
  * to be released, which we want to happen only when finishing the ordered
- * extent (btrfs_finish_ordered_io()). Also note that the caller of
- * btrfs_run_delalloc_range already does proper cleanup for the first page of
- * the range, that is, it invokes the callback writepage_end_io_hook() for the
- * range of the first page.
+ * extent (btrfs_finish_ordered_io()).
  */
 static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
-                                                const u64 offset,
-                                                const u64 bytes)
+                                                struct page *locked_page,
+                                                u64 offset, u64 bytes)
 {
        unsigned long index = offset >> PAGE_SHIFT;
        unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
+       u64 page_start = page_offset(locked_page);
+       u64 page_end = page_start + PAGE_SIZE - 1;
+
        struct page *page;
 
        while (index <= end_index) {
@@ -131,8 +131,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
                ClearPagePrivate2(page);
                put_page(page);
        }
-       return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
-                                           bytes - PAGE_SIZE, false);
+
+       /*
+        * In case this page belongs to the delalloc range being instantiated
+        * then skip it, since the first page of a range is going to be
+        * properly cleaned up by the caller of run_delalloc_range
+        */
+       if (page_start >= offset && page_end <= (offset + bytes - 1)) {
+               offset += PAGE_SIZE;
+               bytes -= PAGE_SIZE;
+       }
+
+       return __endio_write_update_ordered(inode, offset, bytes, false);
 }
 
 static int btrfs_dirty_inode(struct inode *inode);
@@ -230,7 +240,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
                                     start >> PAGE_SHIFT);
                btrfs_set_file_extent_compression(leaf, ei, 0);
                kaddr = kmap_atomic(page);
-               offset = start & (PAGE_SIZE - 1);
+               offset = offset_in_page(start);
                write_extent_buffer(leaf, kaddr + offset, ptr, size);
                kunmap_atomic(kaddr);
                put_page(page);
@@ -539,8 +549,7 @@ again:
                                           &total_compressed);
 
                if (!ret) {
-                       unsigned long offset = total_compressed &
-                               (PAGE_SIZE - 1);
+                       unsigned long offset = offset_in_page(total_compressed);
                        struct page *page = pages[nr_pages - 1];
                        char *kaddr;
 
@@ -1369,7 +1378,8 @@ next_slot:
                         * Do the same check as in btrfs_cross_ref_exist but
                         * without the unnecessary search.
                         */
-                       if (btrfs_file_extent_generation(leaf, fi) <=
+                       if (!nolock &&
+                           btrfs_file_extent_generation(leaf, fi) <=
                            btrfs_root_last_snapshot(&root->root_item))
                                goto out_check;
                        if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
@@ -1602,7 +1612,8 @@ int btrfs_run_delalloc_range(void *private_data, struct page *locked_page,
                                           write_flags);
        }
        if (ret)
-               btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
+               btrfs_cleanup_ordered_extents(inode, locked_page, start,
+                                             end - start + 1);
        return ret;
 }
 
@@ -1870,16 +1881,21 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
 }
 
 /*
- * Merge bio hook, this must check the chunk tree to make sure we don't create
- * bios that span stripes or chunks
+ * btrfs_bio_fits_in_stripe - Checks whether the size of the given bio will fit
+ * in a chunk's stripe. This function ensures that bios do not span a
+ * stripe/chunk
  *
- * return 1 if page cannot be merged to bio
- * return 0 if page can be merged to bio
+ * @page - The page we are about to add to the bio
+ * @size - size we want to add to the bio
+ * @bio - bio we want to ensure is smaller than a stripe
+ * @bio_flags - flags of the bio
+ *
+ * return 1 if page cannot be added to the bio
+ * return 0 if page can be added to the bio
  * return error otherwise
  */
-int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
-                        size_t size, struct bio *bio,
-                        unsigned long bio_flags)
+int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
+                            unsigned long bio_flags)
 {
        struct inode *inode = page->mapping->host;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2023,7 +2039,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                              unsigned int extra_bits,
                              struct extent_state **cached_state, int dedupe)
 {
-       WARN_ON((end & (PAGE_SIZE - 1)) == 0);
+       WARN_ON(PAGE_ALIGNED(end));
        return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
                                   extra_bits, cached_state);
 }
@@ -3653,6 +3669,21 @@ cache_index:
         * inode is not a directory, logging its parent unnecessarily.
         */
        BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
+       /*
+        * Similar reasoning for last_link_trans, needs to be set otherwise
+        * for a case like the following:
+        *
+        * mkdir A
+        * touch foo
+        * ln foo A/bar
+        * echo 2 > /proc/sys/vm/drop_caches
+        * fsync foo
+        * <power failure>
+        *
+        * Would result in link bar and directory A not existing after the power
+        * failure.
+        */
+       BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans;
 
        path->slots[0]++;
        if (inode->i_nlink != 1 ||
@@ -4411,31 +4442,6 @@ out:
        return err;
 }
 
-static int truncate_space_check(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               u64 bytes_deleted)
-{
-       struct btrfs_fs_info *fs_info = root->fs_info;
-       int ret;
-
-       /*
-        * This is only used to apply pressure to the enospc system, we don't
-        * intend to use this reservation at all.
-        */
-       bytes_deleted = btrfs_csum_bytes_to_leaves(fs_info, bytes_deleted);
-       bytes_deleted *= fs_info->nodesize;
-       ret = btrfs_block_rsv_add(root, &fs_info->trans_block_rsv,
-                                 bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
-       if (!ret) {
-               trace_btrfs_space_reservation(fs_info, "transaction",
-                                             trans->transid,
-                                             bytes_deleted, 1);
-               trans->bytes_reserved += bytes_deleted;
-       }
-       return ret;
-
-}
-
 /*
  * Return this if we need to call truncate_block for the last bit of the
  * truncate.
@@ -4480,7 +4486,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        u64 bytes_deleted = 0;
        bool be_nice = false;
        bool should_throttle = false;
-       bool should_end = false;
 
        BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
 
@@ -4693,15 +4698,7 @@ delete:
                                btrfs_abort_transaction(trans, ret);
                                break;
                        }
-                       if (btrfs_should_throttle_delayed_refs(trans))
-                               btrfs_async_run_delayed_refs(fs_info,
-                                       trans->delayed_ref_updates * 2,
-                                       trans->transid, 0);
                        if (be_nice) {
-                               if (truncate_space_check(trans, root,
-                                                        extent_num_bytes)) {
-                                       should_end = true;
-                               }
                                if (btrfs_should_throttle_delayed_refs(trans))
                                        should_throttle = true;
                        }
@@ -4712,7 +4709,7 @@ delete:
 
                if (path->slots[0] == 0 ||
                    path->slots[0] != pending_del_slot ||
-                   should_throttle || should_end) {
+                   should_throttle) {
                        if (pending_del_nr) {
                                ret = btrfs_del_items(trans, root, path,
                                                pending_del_slot,
@@ -4724,23 +4721,24 @@ delete:
                                pending_del_nr = 0;
                        }
                        btrfs_release_path(path);
-                       if (should_throttle) {
-                               unsigned long updates = trans->delayed_ref_updates;
-                               if (updates) {
-                                       trans->delayed_ref_updates = 0;
-                                       ret = btrfs_run_delayed_refs(trans,
-                                                                  updates * 2);
-                                       if (ret)
-                                               break;
-                               }
-                       }
+
                        /*
-                        * if we failed to refill our space rsv, bail out
-                        * and let the transaction restart
+                        * We can generate a lot of delayed refs, so we need to
+                        * throttle every once and a while and make sure we're
+                        * adding enough space to keep up with the work we are
+                        * generating.  Since we hold a transaction here we
+                        * can't flush, and we don't want to FLUSH_LIMIT because
+                        * we could have generated too many delayed refs to
+                        * actually allocate, so just bail if we're short and
+                        * let the normal reservation dance happen higher up.
                         */
-                       if (should_end) {
-                               ret = -EAGAIN;
-                               break;
+                       if (should_throttle) {
+                               ret = btrfs_delayed_refs_rsv_refill(fs_info,
+                                                       BTRFS_RESERVE_NO_FLUSH);
+                               if (ret) {
+                                       ret = -EAGAIN;
+                                       break;
+                               }
                        }
                        goto search_again;
                } else {
@@ -4766,18 +4764,6 @@ out:
        }
 
        btrfs_free_path(path);
-
-       if (be_nice && bytes_deleted > SZ_32M && (ret >= 0 || ret == -EAGAIN)) {
-               unsigned long updates = trans->delayed_ref_updates;
-               int err;
-
-               if (updates) {
-                       trans->delayed_ref_updates = 0;
-                       err = btrfs_run_delayed_refs(trans, updates * 2);
-                       if (err)
-                               ret = err;
-               }
-       }
        return ret;
 }
 
@@ -5300,8 +5286,8 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
                 * Try to steal from the global reserve if there is space for
                 * it.
                 */
-               if (!btrfs_check_space_for_delayed_refs(trans) &&
-                   !btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, false))
+               if (!btrfs_check_space_for_delayed_refs(fs_info) &&
+                   !btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0))
                        return trans;
 
                /* If not, commit and try again. */
@@ -6373,14 +6359,19 @@ fail_dir_item:
                err = btrfs_del_root_ref(trans, key.objectid,
                                         root->root_key.objectid, parent_ino,
                                         &local_index, name, name_len);
-
+               if (err)
+                       btrfs_abort_transaction(trans, err);
        } else if (add_backref) {
                u64 local_index;
                int err;
 
                err = btrfs_del_inode_ref(trans, root, name, name_len,
                                          ino, parent_ino, &local_index);
+               if (err)
+                       btrfs_abort_transaction(trans, err);
        }
+
+       /* Return the original error code */
        return ret;
 }
 
@@ -6592,6 +6583,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                        if (err)
                                goto fail;
                }
+               BTRFS_I(inode)->last_link_trans = trans->transid;
                d_instantiate(dentry, inode);
                ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
                                         true, NULL);
@@ -8017,9 +8009,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
        dio_bio->bi_status = err;
        dio_end_io(dio_bio);
-
-       if (io_bio->end_io)
-               io_bio->end_io(io_bio, blk_status_to_errno(err));
+       btrfs_io_bio_free_csum(io_bio);
        bio_put(bio);
 }
 
@@ -8372,8 +8362,7 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
        if (!ret)
                return;
 
-       if (io_bio->end_io)
-               io_bio->end_io(io_bio, ret);
+       btrfs_io_bio_free_csum(io_bio);
 
 free_ordered:
        /*
@@ -8876,7 +8865,7 @@ again:
 
        /* page is wholly or partially inside EOF */
        if (page_start + PAGE_SIZE > size)
-               zero_start = size & ~PAGE_MASK;
+               zero_start = offset_in_page(size);
        else
                zero_start = PAGE_SIZE;
 
@@ -9121,6 +9110,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->index_cnt = (u64)-1;
        ei->dir_index = 0;
        ei->last_unlink_trans = 0;
+       ei->last_link_trans = 0;
        ei->last_log_commit = 0;
 
        spin_lock_init(&ei->lock);
@@ -10412,12 +10402,6 @@ out:
        return ret;
 }
 
-__attribute__((const))
-static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
-{
-       return -EAGAIN;
-}
-
 void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 {
        struct inode *inode = tree->private_data;
@@ -10813,7 +10797,6 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
        /* mandatory callbacks */
        .submit_bio_hook = btrfs_submit_bio_hook,
        .readpage_end_io_hook = btrfs_readpage_end_io_hook,
-       .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
 };
 
 /*