Merge branch 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Jul 2015 17:26:34 +0000 (10:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Jul 2015 17:26:34 +0000 (10:26 -0700)
Pull btrfs fixes from Chris Mason:
 "This is an assortment of fixes.  Most of the commits are from Filipe
  (fsync, the inode allocation cache and a few others).  Mark kicked in
  a series fixing corners in the extent sharing ioctls, and everyone
  else fixed up on assorted other problems"

* 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix wrong check for btrfs_force_chunk_alloc()
  Btrfs: fix warning of bytes_may_use
  Btrfs: fix hang when failing to submit bio of directIO
  Btrfs: fix a comment in inode.c:evict_inode_truncate_pages()
  Btrfs: fix memory corruption on failure to submit bio for direct IO
  btrfs: don't update mtime/ctime on deduped inodes
  btrfs: allow dedupe of same inode
  btrfs: fix deadlock with extent-same and readpage
  btrfs: pass unaligned length to btrfs_cmp_data()
  Btrfs: fix fsync after truncate when no_holes feature is enabled
  Btrfs: fix fsync xattr loss in the fast fsync path
  Btrfs: fix fsync data loss after append write
  Btrfs: fix crash on close_ctree() if cleaner starts new transaction
  Btrfs: fix race between caching kthread and returning inode to inode cache
  Btrfs: use kmem_cache_free when freeing entry in inode cache
  Btrfs: fix race between balance and unused block group deletion
  btrfs: add error handling for scrub_workers_get()
  btrfs: cleanup noused initialization of dev in btrfs_end_bio()
  btrfs: qgroup: allow user to clear the limitation on qgroup

1  2 
fs/btrfs/disk-io.c
fs/btrfs/volumes.c

diff --combined fs/btrfs/disk-io.c
index 3f43bfea3684a13e378fbdd3d364c0635e14daca,e5aad7f535aabc6344e5788de90b399a9c6bd492..a9aadb2ad5254cfe98d36a6eed1f12d4ae5e7925
@@@ -1744,13 -1744,14 +1744,14 @@@ static void end_workqueue_fn(struct btr
        bio->bi_private = end_io_wq->private;
        bio->bi_end_io = end_io_wq->end_io;
        kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
 -      bio_endio_nodec(bio, error);
 +      bio_endio(bio, error);
  }
  
  static int cleaner_kthread(void *arg)
  {
        struct btrfs_root *root = arg;
        int again;
+       struct btrfs_trans_handle *trans;
  
        do {
                again = 0;
                }
  
                btrfs_run_delayed_iputs(root);
-               btrfs_delete_unused_bgs(root->fs_info);
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&root->fs_info->cleaner_mutex);
  
                 * needn't do anything special here.
                 */
                btrfs_run_defrag_inodes(root->fs_info);
+               /*
+                * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
+                * with relocation (btrfs_relocate_chunk) and relocation
+                * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
+                * after acquiring fs_info->delete_unused_bgs_mutex. So we
+                * can't hold, nor need to, fs_info->cleaner_mutex when deleting
+                * unused block groups.
+                */
+               btrfs_delete_unused_bgs(root->fs_info);
  sleep:
                if (!try_to_freeze() && !again) {
                        set_current_state(TASK_INTERRUPTIBLE);
                        __set_current_state(TASK_RUNNING);
                }
        } while (!kthread_should_stop());
+       /*
+        * Transaction kthread is stopped before us and wakes us up.
+        * However we might have started a new transaction and COWed some
+        * tree blocks when deleting unused block groups for example. So
+        * make sure we commit the transaction we started to have a clean
+        * shutdown when evicting the btree inode - if it has dirty pages
+        * when we do the final iput() on it, eviction will trigger a
+        * writeback for it which will fail with null pointer dereferences
+        * since work queues and other resources were already released and
+        * destroyed by the time the iput/eviction/writeback is made.
+        */
+       trans = btrfs_attach_transaction(root);
+       if (IS_ERR(trans)) {
+               if (PTR_ERR(trans) != -ENOENT)
+                       btrfs_err(root->fs_info,
+                                 "cleaner transaction attach returned %ld",
+                                 PTR_ERR(trans));
+       } else {
+               int ret;
+               ret = btrfs_commit_transaction(trans, root);
+               if (ret)
+                       btrfs_err(root->fs_info,
+                                 "cleaner open transaction commit returned %d",
+                                 ret);
+       }
        return 0;
  }
  
@@@ -2492,6 -2530,7 +2530,7 @@@ int open_ctree(struct super_block *sb
        spin_lock_init(&fs_info->unused_bgs_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->unused_bg_unpin_mutex);
+       mutex_init(&fs_info->delete_unused_bgs_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
        seqlock_init(&fs_info->profiles_lock);
@@@ -3286,8 -3325,11 +3325,8 @@@ static int write_dev_supers(struct btrf
   */
  static void btrfs_end_empty_barrier(struct bio *bio, int err)
  {
 -      if (err) {
 -              if (err == -EOPNOTSUPP)
 -                      set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 +      if (err)
                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 -      }
        if (bio->bi_private)
                complete(bio->bi_private);
        bio_put(bio);
@@@ -3315,7 -3357,11 +3354,7 @@@ static int write_dev_flush(struct btrfs
  
                wait_for_completion(&device->flush_wait);
  
 -              if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
 -                      printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
 -                                    rcu_str_deref(device->name));
 -                      device->nobarriers = 1;
 -              } else if (!bio_flagged(bio, BIO_UPTODATE)) {
 +              if (!bio_flagged(bio, BIO_UPTODATE)) {
                        ret = -EIO;
                        btrfs_dev_stat_inc_and_print(device,
                                BTRFS_DEV_STAT_FLUSH_ERRS);
diff --combined fs/btrfs/volumes.c
index 4b438b4c8c9195b3aaf11519339719efbef7b21b,9b95503ddd000c6e3495a8261954fc08d480f783..fbe7c104531c9e5eaf8b347f46d0b8a783f88322
@@@ -349,7 -349,7 +349,7 @@@ loop_lock
                    waitqueue_active(&fs_info->async_submit_wait))
                        wake_up(&fs_info->async_submit_wait);
  
 -              BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 +              BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
  
                /*
                 * if we're doing the sync list, record that our
@@@ -2766,6 -2766,20 +2766,20 @@@ static int btrfs_relocate_chunk(struct 
        root = root->fs_info->chunk_root;
        extent_root = root->fs_info->extent_root;
  
+       /*
+        * Prevent races with automatic removal of unused block groups.
+        * After we relocate and before we remove the chunk with offset
+        * chunk_offset, automatic removal of the block group can kick in,
+        * resulting in a failure when calling btrfs_remove_chunk() below.
+        *
+        * Make sure to acquire this mutex before doing a tree search (dev
+        * or chunk trees) to find chunks. Otherwise the cleaner kthread might
+        * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
+        * we release the path used to search the chunk/dev tree and before
+        * the current task acquires this mutex and calls us.
+        */
+       ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
        ret = btrfs_can_relocate(extent_root, chunk_offset);
        if (ret)
                return -ENOSPC;
@@@ -2814,13 -2828,18 +2828,18 @@@ again
        key.type = BTRFS_CHUNK_ITEM_KEY;
  
        while (1) {
+               mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        goto error;
+               }
                BUG_ON(ret == 0); /* Corruption */
  
                ret = btrfs_previous_item(chunk_root, path, key.objectid,
                                          key.type);
+               if (ret)
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret < 0)
                        goto error;
                if (ret > 0)
                        else
                                BUG_ON(ret);
                }
+               mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
  
                if (found_key.offset == 0)
                        break;
@@@ -3299,9 -3319,12 +3319,12 @@@ again
                        goto error;
                }
  
+               mutex_lock(&fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        goto error;
+               }
  
                /*
                 * this shouldn't happen, it means the last relocate
                ret = btrfs_previous_item(chunk_root, path, 0,
                                          BTRFS_CHUNK_ITEM_KEY);
                if (ret) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        ret = 0;
                        break;
                }
                slot = path->slots[0];
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
  
-               if (found_key.objectid != key.objectid)
+               if (found_key.objectid != key.objectid) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        break;
+               }
  
                chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
  
                ret = should_balance_chunk(chunk_root, leaf, chunk,
                                           found_key.offset);
                btrfs_release_path(path);
-               if (!ret)
+               if (!ret) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        goto loop;
+               }
  
                if (counting) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        spin_lock(&fs_info->balance_lock);
                        bctl->stat.expected++;
                        spin_unlock(&fs_info->balance_lock);
                ret = btrfs_relocate_chunk(chunk_root,
                                           found_key.objectid,
                                           found_key.offset);
+               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                if (ret && ret != -ENOSPC)
                        goto error;
                if (ret == -ENOSPC) {
@@@ -4087,11 -4117,16 +4117,16 @@@ again
        key.type = BTRFS_DEV_EXTENT_KEY;
  
        do {
+               mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        goto done;
+               }
  
                ret = btrfs_previous_item(root, path, 0, key.type);
+               if (ret)
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret < 0)
                        goto done;
                if (ret) {
                btrfs_item_key_to_cpu(l, &key, path->slots[0]);
  
                if (key.objectid != device->devid) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        btrfs_release_path(path);
                        break;
                }
                length = btrfs_dev_extent_length(l, dev_extent);
  
                if (key.offset + length <= new_size) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        btrfs_release_path(path);
                        break;
                }
                btrfs_release_path(path);
  
                ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
+               mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret && ret != -ENOSPC)
                        goto done;
                if (ret == -ENOSPC)
@@@ -5705,17 -5743,16 +5743,16 @@@ int btrfs_rmap_block(struct btrfs_mappi
  
  static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
  {
 -      if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED))
 -              bio_endio_nodec(bio, err);
 -      else
 -              bio_endio(bio, err);
 +      bio->bi_private = bbio->private;
 +      bio->bi_end_io = bbio->end_io;
 +      bio_endio(bio, err);
 +
        btrfs_put_bbio(bbio);
  }
  
  static void btrfs_end_bio(struct bio *bio, int err)
  {
        struct btrfs_bio *bbio = bio->bi_private;
-       struct btrfs_device *dev = bbio->stripes[0].dev;
        int is_orig_bio = 0;
  
        if (err) {
                if (err == -EIO || err == -EREMOTEIO) {
                        unsigned int stripe_index =
                                btrfs_io_bio(bio)->stripe_index;
+                       struct btrfs_device *dev;
  
                        BUG_ON(stripe_index >= bbio->num_stripes);
                        dev = bbio->stripes[stripe_index].dev;
                        bio = bbio->orig_bio;
                }
  
 -              bio->bi_private = bbio->private;
 -              bio->bi_end_io = bbio->end_io;
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                /* only send an error to the higher layers if it is
                 * beyond the tolerance of the btrfs bio
@@@ -5933,6 -5973,8 +5971,6 @@@ static void bbio_error(struct btrfs_bi
                /* Shoud be the original bio. */
                WARN_ON(bio != bbio->orig_bio);
  
 -              bio->bi_private = bbio->private;
 -              bio->bi_end_io = bbio->end_io;
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                bio->bi_iter.bi_sector = logical >> 9;
  
@@@ -6013,8 -6055,10 +6051,8 @@@ int btrfs_map_bio(struct btrfs_root *ro
                if (dev_nr < total_devs - 1) {
                        bio = btrfs_bio_clone(first_bio, GFP_NOFS);
                        BUG_ON(!bio); /* -ENOMEM */
 -              } else {
 +              } else
                        bio = first_bio;
 -                      bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED;
 -              }
  
                submit_stripe_bio(root, bbio, bio,
                                  bbio->stripes[dev_nr].physical, dev_nr, rw,