btrfs: use EXPORT_FOR_TESTS for conditionally exported functions
[sfrench/cifs-2.6.git] / fs / btrfs / extent_io.c
index d228f706ff3e61784e4c78e71a40d923dbefe1d2..4ea808d6cfbcc794c2dbeccde3fa6f6331b1d749 100644 (file)
@@ -89,9 +89,18 @@ void btrfs_leak_debug_check(void)
 static inline void __btrfs_debug_check_extent_io_range(const char *caller,
                struct extent_io_tree *tree, u64 start, u64 end)
 {
-       if (tree->ops && tree->ops->check_extent_io_range)
-               tree->ops->check_extent_io_range(tree->private_data, caller,
-                                                start, end);
+       struct inode *inode = tree->private_data;
+       u64 isize;
+
+       if (!inode || !is_data_inode(inode))
+               return;
+
+       isize = i_size_read(inode);
+       if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
+               btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
+                   "%s: ino %llu isize %llu odd range [%llu,%llu]",
+                       caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
+       }
 }
 #else
 #define btrfs_leak_debug_add(new, head)        do {} while (0)
@@ -344,13 +353,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
        return tree_search_for_insert(tree, offset, NULL, NULL);
 }
 
-static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
-                    struct extent_state *other)
-{
-       if (tree->ops && tree->ops->merge_extent_hook)
-               tree->ops->merge_extent_hook(tree->private_data, new, other);
-}
-
 /*
  * utility function to look for merge candidates inside a given range.
  * Any extents with matching state are merged together into a single
@@ -374,7 +376,10 @@ static void merge_state(struct extent_io_tree *tree,
                other = rb_entry(other_node, struct extent_state, rb_node);
                if (other->end == state->start - 1 &&
                    other->state == state->state) {
-                       merge_cb(tree, state, other);
+                       if (tree->private_data &&
+                           is_data_inode(tree->private_data))
+                               btrfs_merge_delalloc_extent(tree->private_data,
+                                                           state, other);
                        state->start = other->start;
                        rb_erase(&other->rb_node, &tree->state);
                        RB_CLEAR_NODE(&other->rb_node);
@@ -386,7 +391,10 @@ static void merge_state(struct extent_io_tree *tree,
                other = rb_entry(other_node, struct extent_state, rb_node);
                if (other->start == state->end + 1 &&
                    other->state == state->state) {
-                       merge_cb(tree, state, other);
+                       if (tree->private_data &&
+                           is_data_inode(tree->private_data))
+                               btrfs_merge_delalloc_extent(tree->private_data,
+                                                           state, other);
                        state->end = other->end;
                        rb_erase(&other->rb_node, &tree->state);
                        RB_CLEAR_NODE(&other->rb_node);
@@ -395,20 +403,6 @@ static void merge_state(struct extent_io_tree *tree,
        }
 }
 
-static void set_state_cb(struct extent_io_tree *tree,
-                        struct extent_state *state, unsigned *bits)
-{
-       if (tree->ops && tree->ops->set_bit_hook)
-               tree->ops->set_bit_hook(tree->private_data, state, bits);
-}
-
-static void clear_state_cb(struct extent_io_tree *tree,
-                          struct extent_state *state, unsigned *bits)
-{
-       if (tree->ops && tree->ops->clear_bit_hook)
-               tree->ops->clear_bit_hook(tree->private_data, state, bits);
-}
-
 static void set_state_bits(struct extent_io_tree *tree,
                           struct extent_state *state, unsigned *bits,
                           struct extent_changeset *changeset);
@@ -451,13 +445,6 @@ static int insert_state(struct extent_io_tree *tree,
        return 0;
 }
 
-static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
-                    u64 split)
-{
-       if (tree->ops && tree->ops->split_extent_hook)
-               tree->ops->split_extent_hook(tree->private_data, orig, split);
-}
-
 /*
  * split a given extent state struct in two, inserting the preallocated
  * struct 'prealloc' as the newly created second half.  'split' indicates an
@@ -477,7 +464,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
 {
        struct rb_node *node;
 
-       split_cb(tree, orig, split);
+       if (tree->private_data && is_data_inode(tree->private_data))
+               btrfs_split_delalloc_extent(tree->private_data, orig, split);
 
        prealloc->start = orig->start;
        prealloc->end = split - 1;
@@ -523,7 +511,10 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
                WARN_ON(range > tree->dirty_bytes);
                tree->dirty_bytes -= range;
        }
-       clear_state_cb(tree, state, bits);
+
+       if (tree->private_data && is_data_inode(tree->private_data))
+               btrfs_clear_delalloc_extent(tree->private_data, state, bits);
+
        ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
        BUG_ON(ret < 0);
        state->state &= ~bits_to_clear;
@@ -800,7 +791,9 @@ static void set_state_bits(struct extent_io_tree *tree,
        unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
        int ret;
 
-       set_state_cb(tree, state, bits);
+       if (tree->private_data && is_data_inode(tree->private_data))
+               btrfs_set_delalloc_extent(tree->private_data, state, bits);
+
        if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
                u64 range = state->end - state->start + 1;
                tree->dirty_bytes += range;
@@ -1563,11 +1556,13 @@ static noinline int lock_delalloc_pages(struct inode *inode,
  *
  * 1 is returned if we find something, 0 if nothing was in the tree
  */
-static noinline_for_stack u64 find_lock_delalloc_range(struct inode *inode,
+EXPORT_FOR_TESTS
+noinline_for_stack u64 find_lock_delalloc_range(struct inode *inode,
                                    struct extent_io_tree *tree,
                                    struct page *locked_page, u64 *start,
-                                   u64 *end, u64 max_bytes)
+                                   u64 *end)
 {
+       u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
        u64 delalloc_start;
        u64 delalloc_end;
        u64 found;
@@ -1605,6 +1600,7 @@ again:
        /* step two, lock all the pages after the page that has start */
        ret = lock_delalloc_pages(inode, locked_page,
                                  delalloc_start, delalloc_end);
+       ASSERT(!ret || ret == -EAGAIN);
        if (ret == -EAGAIN) {
                /* some of the pages are gone, lets avoid looping by
                 * shortening the size of the delalloc range we're searching
@@ -1620,7 +1616,6 @@ again:
                        goto out_failed;
                }
        }
-       BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
 
        /* step three, lock the state bits for the whole range */
        lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
@@ -1643,17 +1638,6 @@ out_failed:
        return found;
 }
 
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-u64 btrfs_find_lock_delalloc_range(struct inode *inode,
-                                   struct extent_io_tree *tree,
-                                   struct page *locked_page, u64 *start,
-                                   u64 *end, u64 max_bytes)
-{
-       return find_lock_delalloc_range(inode, tree, locked_page, start, end,
-                       max_bytes);
-}
-#endif
-
 static int __process_pages_contig(struct address_space *mapping,
                                  struct page *locked_page,
                                  pgoff_t start_index, pgoff_t end_index,
@@ -2412,14 +2396,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
 {
        int uptodate = (err == 0);
-       struct extent_io_tree *tree;
        int ret = 0;
 
-       tree = &BTRFS_I(page->mapping->host)->io_tree;
-
-       if (tree->ops && tree->ops->writepage_end_io_hook)
-               tree->ops->writepage_end_io_hook(page, start, end, NULL,
-                               uptodate);
+       btrfs_writepage_endio_finish_ordered(page, start, end, uptodate);
 
        if (!uptodate) {
                ClearPageUptodate(page);
@@ -3205,7 +3184,7 @@ static void update_nr_written(struct writeback_control *wbc,
 /*
  * helper for __extent_writepage, doing all of the delayed allocation setup.
  *
- * This returns 1 if our fill_delalloc function did all the work required
+ * This returns 1 if btrfs_run_delalloc_range function did all the work required
  * to write the page (copy into inline extent).  In this case the IO has
  * been started and the page is already unlocked.
  *
@@ -3213,12 +3192,10 @@ static void update_nr_written(struct writeback_control *wbc,
  * This returns < 0 if there were errors (page still locked)
  */
 static noinline_for_stack int writepage_delalloc(struct inode *inode,
-                             struct page *page, struct writeback_control *wbc,
-                             struct extent_page_data *epd,
-                             u64 delalloc_start,
-                             unsigned long *nr_written)
+               struct page *page, struct writeback_control *wbc,
+               u64 delalloc_start, unsigned long *nr_written)
 {
-       struct extent_io_tree *tree = epd->tree;
+       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        u64 page_end = delalloc_start + PAGE_SIZE - 1;
        u64 nr_delalloc;
        u64 delalloc_to_write = 0;
@@ -3226,31 +3203,26 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
        int ret;
        int page_started = 0;
 
-       if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
-               return 0;
 
        while (delalloc_end < page_end) {
                nr_delalloc = find_lock_delalloc_range(inode, tree,
                                               page,
                                               &delalloc_start,
-                                              &delalloc_end,
-                                              BTRFS_MAX_EXTENT_SIZE);
+                                              &delalloc_end);
                if (nr_delalloc == 0) {
                        delalloc_start = delalloc_end + 1;
                        continue;
                }
-               ret = tree->ops->fill_delalloc(inode, page,
-                                              delalloc_start,
-                                              delalloc_end,
-                                              &page_started,
-                                              nr_written, wbc);
+               ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
+                               delalloc_end, &page_started, nr_written, wbc);
                /* File system has been set read-only */
                if (ret) {
                        SetPageError(page);
-                       /* fill_delalloc should be return < 0 for error
-                        * but just in case, we use > 0 here meaning the
-                        * IO is started, so we don't want to return > 0
-                        * unless things are going well.
+                       /*
+                        * btrfs_run_delalloc_range should return < 0 for error
+                        * but just in case, we use > 0 here meaning the IO is
+                        * started, so we don't want to return > 0 unless
+                        * things are going well.
                         */
                        ret = ret < 0 ? ret : -EIO;
                        goto done;
@@ -3323,20 +3295,17 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
        int nr = 0;
        bool compressed;
 
-       if (tree->ops && tree->ops->writepage_start_hook) {
-               ret = tree->ops->writepage_start_hook(page, start,
-                                                     page_end);
-               if (ret) {
-                       /* Fixup worker will requeue */
-                       if (ret == -EBUSY)
-                               wbc->pages_skipped++;
-                       else
-                               redirty_page_for_writepage(wbc, page);
+       ret = btrfs_writepage_cow_fixup(page, start, page_end);
+       if (ret) {
+               /* Fixup worker will requeue */
+               if (ret == -EBUSY)
+                       wbc->pages_skipped++;
+               else
+                       redirty_page_for_writepage(wbc, page);
 
-                       update_nr_written(wbc, nr_written);
-                       unlock_page(page);
-                       return 1;
-               }
+               update_nr_written(wbc, nr_written);
+               unlock_page(page);
+               return 1;
        }
 
        /*
@@ -3347,9 +3316,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 
        end = page_end;
        if (i_size <= start) {
-               if (tree->ops && tree->ops->writepage_end_io_hook)
-                       tree->ops->writepage_end_io_hook(page, start,
-                                                        page_end, NULL, 1);
+               btrfs_writepage_endio_finish_ordered(page, start, page_end, 1);
                goto done;
        }
 
@@ -3360,9 +3327,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                u64 offset;
 
                if (cur >= i_size) {
-                       if (tree->ops && tree->ops->writepage_end_io_hook)
-                               tree->ops->writepage_end_io_hook(page, cur,
-                                                        page_end, NULL, 1);
+                       btrfs_writepage_endio_finish_ordered(page, cur,
+                                                            page_end, 1);
                        break;
                }
                em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
@@ -3396,11 +3362,10 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                         * end_io notification does not happen here for
                         * compressed extents
                         */
-                       if (!compressed && tree->ops &&
-                           tree->ops->writepage_end_io_hook)
-                               tree->ops->writepage_end_io_hook(page, cur,
-                                                        cur + iosize - 1,
-                                                        NULL, 1);
+                       if (!compressed)
+                               btrfs_writepage_endio_finish_ordered(page, cur,
+                                                           cur + iosize - 1,
+                                                           1);
                        else if (compressed) {
                                /* we don't want to end_page_writeback on
                                 * a compressed extent.  this happens
@@ -3491,11 +3456,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
        set_page_extent_mapped(page);
 
-       ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
-       if (ret == 1)
-               goto done_unlocked;
-       if (ret)
-               goto done;
+       if (!epd->extent_locked) {
+               ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
+               if (ret == 1)
+                       goto done_unlocked;
+               if (ret)
+                       goto done;
+       }
 
        ret = __extent_writepage_io(inode, page, wbc, epd,
                                    i_size, nr_written, write_flags, &nr);
@@ -3934,12 +3901,25 @@ static int extent_write_cache_pages(struct address_space *mapping,
                        range_whole = 1;
                scanned = 1;
        }
-       if (wbc->sync_mode == WB_SYNC_ALL)
+
+       /*
+        * We do the tagged writepage as long as the snapshot flush bit is set
+        * and we are the first one who do the filemap_flush() on this inode.
+        *
+        * The nr_to_write == LONG_MAX is needed to make sure other flushers do
+        * not race in and drop the bit.
+        */
+       if (range_whole && wbc->nr_to_write == LONG_MAX &&
+           test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
+                              &BTRFS_I(inode)->runtime_flags))
+               wbc->tagged_writepages = 1;
+
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag = PAGECACHE_TAG_TOWRITE;
        else
                tag = PAGECACHE_TAG_DIRTY;
 retry:
-       if (wbc->sync_mode == WB_SYNC_ALL)
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag_pages_for_writeback(mapping, index, end);
        done_index = index;
        while (!done && !nr_to_write_done && (index <= end) &&
@@ -4084,10 +4064,8 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
                if (clear_page_dirty_for_io(page))
                        ret = __extent_writepage(page, &wbc_writepages, &epd);
                else {
-                       if (tree->ops && tree->ops->writepage_end_io_hook)
-                               tree->ops->writepage_end_io_hook(page, start,
-                                                start + PAGE_SIZE - 1,
-                                                NULL, 1);
+                       btrfs_writepage_endio_finish_ordered(page, start,
+                                                   start + PAGE_SIZE - 1, 1);
                        unlock_page(page);
                }
                put_page(page);
@@ -4914,13 +4892,6 @@ again:
        check_buffer_tree_ref(eb);
        set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
 
-       /*
-        * We will free dummy extent buffer's if they come into
-        * free_extent_buffer with a ref count of 2, but if we are using this we
-        * want the buffers to stay in memory until we're done with them, so
-        * bump the ref count again.
-        */
-       atomic_inc(&eb->refs);
        return eb;
 free_eb:
        btrfs_release_extent_buffer(eb);
@@ -5102,7 +5073,9 @@ void free_extent_buffer(struct extent_buffer *eb)
 
        while (1) {
                refs = atomic_read(&eb->refs);
-               if (refs <= 3)
+               if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
+                   || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
+                       refs == 1))
                        break;
                old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
                if (old == refs)
@@ -5110,10 +5083,6 @@ void free_extent_buffer(struct extent_buffer *eb)
        }
 
        spin_lock(&eb->refs_lock);
-       if (atomic_read(&eb->refs) == 2 &&
-           test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))
-               atomic_dec(&eb->refs);
-
        if (atomic_read(&eb->refs) == 2 &&
            test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
            !extent_buffer_under_io(eb) &&