Btrfs: fix fiemap bugs with delalloc
authorChris Mason <chris.mason@oracle.com>
Wed, 23 Feb 2011 21:23:20 +0000 (16:23 -0500)
committerChris Mason <chris.mason@oracle.com>
Wed, 23 Feb 2011 21:23:20 +0000 (16:23 -0500)
The Btrfs fiemap code wasn't properly returning delalloc extents,
so applications that trust fiemap to decide if there are holes in the
file see holes instead of delalloc.

This reworks the btrfs fiemap code, adding a get_extent helper that
searches for delalloc ranges and also adding a helper for extent_fiemap
that skips past holes in the file.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c

index e7aeba2427018041afb137c26bfaba213de98d98..ff45b80d90f05a5fbbdfe874de8f6cdc2d037afb 100644 (file)
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
  */
 u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end, u64 max_bytes,
-                    unsigned long bits)
+                    unsigned long bits, int contig)
 {
        struct rb_node *node;
        struct extent_state *state;
        u64 cur_start = *start;
        u64 total_bytes = 0;
+       u64 last = 0;
        int found = 0;
 
        if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
                state = rb_entry(node, struct extent_state, rb_node);
                if (state->start > search_end)
                        break;
-               if (state->end >= cur_start && (state->state & bits)) {
+               if (contig && found && state->start > last + 1)
+                       break;
+               if (state->end >= cur_start && (state->state & bits) == bits) {
                        total_bytes += min(search_end, state->end) + 1 -
                                       max(cur_start, state->start);
                        if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
                                *start = state->start;
                                found = 1;
                        }
+                       last = state->end;
+               } else if (contig && found) {
+                       break;
                }
                node = rb_next(node);
                if (!node)
@@ -2912,6 +2918,46 @@ out:
        return sector;
 }
 
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+                                               u64 offset,
+                                               u64 last,
+                                               get_extent_t *get_extent)
+{
+       u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+       struct extent_map *em;
+       u64 len;
+
+       if (offset >= last)
+               return NULL;
+
+       while(1) {
+               len = last - offset;
+               if (len == 0)
+                       break;
+               len = (len + sectorsize - 1) & ~(sectorsize - 1);
+               em = get_extent(inode, NULL, 0, offset, len, 0);
+               if (!em || IS_ERR(em))
+                       return em;
+
+               /* if this isn't a hole return it */
+               if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+                   em->block_start != EXTENT_MAP_HOLE) {
+                       return em;
+               }
+
+               /* this is a hole, advance to the next extent */
+               offset = extent_map_end(em);
+               free_extent_map(em);
+               if (offset >= last)
+                       break;
+       }
+       return NULL;
+}
+
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len, get_extent_t *get_extent)
 {
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        u32 flags = 0;
        u32 found_type;
        u64 last;
+       u64 last_for_get_extent = 0;
        u64 disko = 0;
+       u64 isize = i_size_read(inode);
        struct btrfs_key found_key;
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
        struct btrfs_path *path;
        struct btrfs_file_extent_item *item;
        int end = 0;
-       u64 em_start = 0, em_len = 0;
+       u64 em_start = 0;
+       u64 em_len = 0;
+       u64 em_end = 0;
        unsigned long emflags;
-       int hole = 0;
 
        if (len == 0)
                return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                return -ENOMEM;
        path->leave_spinning = 1;
 
+       /*
+        * lookup the last file extent.  We're not using i_size here
+        * because there might be preallocation past i_size
+        */
        ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
                                       path, inode->i_ino, -1, 0);
        if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
        found_type = btrfs_key_type(&found_key);
 
-       /* No extents, just return */
+       /* No extents, but there might be delalloc bits */
        if (found_key.objectid != inode->i_ino ||
            found_type != BTRFS_EXTENT_DATA_KEY) {
-               btrfs_free_path(path);
-               return 0;
+               /* have to trust i_size as the end */
+               last = (u64)-1;
+               last_for_get_extent = isize;
+       } else {
+               /*
+                * remember the start of the last extent.  There are a
+                * bunch of different factors that go into the length of the
+                * extent, so its much less complex to remember where it started
+                */
+               last = found_key.offset;
+               last_for_get_extent = last + 1;
        }
-       last = found_key.offset;
        btrfs_free_path(path);
 
+       /*
+        * we might have some extents allocated but more delalloc past those
+        * extents.  so, we trust isize unless the start of the last extent is
+        * beyond isize
+        */
+       if (last < isize) {
+               last = (u64)-1;
+               last_for_get_extent = isize;
+       }
+
        lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
                         &cached_state, GFP_NOFS);
-       em = get_extent(inode, NULL, 0, off, max - off, 0);
+
+       em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                  get_extent);
        if (!em)
                goto out;
        if (IS_ERR(em)) {
@@ -2973,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        }
 
        while (!end) {
-               hole = 0;
-               off = em->start + em->len;
+               off = extent_map_end(em);
                if (off >= max)
                        end = 1;
 
-               if (em->block_start == EXTENT_MAP_HOLE) {
-                       hole = 1;
-                       goto next;
-               }
-
                em_start = em->start;
                em_len = em->len;
-
+               em_end = extent_map_end(em);
+               emflags = em->flags;
                disko = 0;
                flags = 0;
 
@@ -3004,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
 
-next:
-               emflags = em->flags;
                free_extent_map(em);
                em = NULL;
-               if (!end) {
-                       em = get_extent(inode, NULL, 0, off, max - off, 0);
-                       if (!em)
-                               goto out;
-                       if (IS_ERR(em)) {
-                               ret = PTR_ERR(em);
-                               goto out;
-                       }
-                       emflags = em->flags;
-               }
-
-               if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+               if ((em_start >= last) || em_len == (u64)-1 ||
+                  (last == (u64)-1 && isize <= em_end)) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
 
-               if (em_start == last) {
+               /* now scan forward to see if this is really the last extent. */
+               em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                          get_extent);
+               if (IS_ERR(em)) {
+                       ret = PTR_ERR(em);
+                       goto out;
+               }
+               if (!em) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
-
-               if (!hole) {
-                       ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-                                               em_len, flags);
-                       if (ret)
-                               goto out_free;
-               }
+               ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+                                             em_len, flags);
+               if (ret)
+                       goto out_free;
        }
 out_free:
        free_extent_map(em);
index 7083cfafd061abf64f7b0c2252bd91324022b79a..9318dfefd59c50d8d87db399d70c5932e06f8d60 100644 (file)
@@ -191,7 +191,7 @@ void extent_io_exit(void);
 
 u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end,
-                    u64 max_bytes, unsigned long bits);
+                    u64 max_bytes, unsigned long bits, int contig);
 
 void free_extent_state(struct extent_state *state);
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
index 8d392ed73d571306456eea06883c91b0f1e66055..44b926646e33912e2a4df629aa76b4fd8ae92c72 100644 (file)
@@ -1913,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
 
        private = 0;
        if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
-                            (u64)-1, 1, EXTENT_DIRTY)) {
+                            (u64)-1, 1, EXTENT_DIRTY, 0)) {
                ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
                                        start, &private_failure);
                if (ret == 0) {
@@ -5282,6 +5282,128 @@ out:
        return em;
 }
 
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+                                          size_t pg_offset, u64 start, u64 len,
+                                          int create)
+{
+       struct extent_map *em;
+       struct extent_map *hole_em = NULL;
+       u64 range_start = start;
+       u64 end;
+       u64 found;
+       u64 found_end;
+       int err = 0;
+
+       em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+       if (IS_ERR(em))
+               return em;
+       if (em) {
+               /*
+                * if our em maps to a hole, there might
+                * actually be delalloc bytes behind it
+                */
+               if (em->block_start != EXTENT_MAP_HOLE)
+                       return em;
+               else
+                       hole_em = em;
+       }
+
+       /* check to see if we've wrapped (len == -1 or similar) */
+       end = start + len;
+       if (end < start)
+               end = (u64)-1;
+       else
+               end -= 1;
+
+       em = NULL;
+
+       /* ok, we didn't find anything, lets look for delalloc */
+       found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+                                end, len, EXTENT_DELALLOC, 1);
+       found_end = range_start + found;
+       if (found_end < range_start)
+               found_end = (u64)-1;
+
+       /*
+        * we didn't find anything useful, return
+        * the original results from get_extent()
+        */
+       if (range_start > end || found_end <= start) {
+               em = hole_em;
+               hole_em = NULL;
+               goto out;
+       }
+
+       /* adjust the range_start to make sure it doesn't
+        * go backwards from the start they passed in
+        */
+       range_start = max(start,range_start);
+       found = found_end - range_start;
+
+       if (found > 0) {
+               u64 hole_start = start;
+               u64 hole_len = len;
+
+               em = alloc_extent_map(GFP_NOFS);
+               if (!em) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               /*
+                * when btrfs_get_extent can't find anything it
+                * returns one huge hole
+                *
+                * make sure what it found really fits our range, and
+                * adjust to make sure it is based on the start from
+                * the caller
+                */
+               if (hole_em) {
+                       u64 calc_end = extent_map_end(hole_em);
+
+                       if (calc_end <= start || (hole_em->start > end)) {
+                               free_extent_map(hole_em);
+                               hole_em = NULL;
+                       } else {
+                               hole_start = max(hole_em->start, start);
+                               hole_len = calc_end - hole_start;
+                       }
+               }
+               em->bdev = NULL;
+               if (hole_em && range_start > hole_start) {
+                       /* our hole starts before our delalloc, so we
+                        * have to return just the parts of the hole
+                        * that go until  the delalloc starts
+                        */
+                       em->len = min(hole_len,
+                                     range_start - hole_start);
+                       em->start = hole_start;
+                       em->orig_start = hole_start;
+                       /*
+                        * don't adjust block start at all,
+                        * it is fixed at EXTENT_MAP_HOLE
+                        */
+                       em->block_start = hole_em->block_start;
+                       em->block_len = hole_len;
+               } else {
+                       em->start = range_start;
+                       em->len = found;
+                       em->orig_start = range_start;
+                       em->block_start = EXTENT_MAP_DELALLOC;
+                       em->block_len = found;
+               }
+       } else if (hole_em) {
+               return hole_em;
+       }
+out:
+
+       free_extent_map(hole_em);
+       if (err) {
+               free_extent_map(em);
+               return ERR_PTR(err);
+       }
+       return em;
+}
+
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                                                  u64 start, u64 len)
 {
@@ -6104,7 +6226,7 @@ out:
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
-       return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+       return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
 }
 
 int btrfs_readpage(struct file *file, struct page *page)