Merge branch 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Sep 2017 20:27:51 +0000 (13:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Sep 2017 20:27:51 +0000 (13:27 -0700)
Pull btrfs updates from David Sterba:
 "The changes range through all types: cleanups, core chagnes, sanity
  checks, fixes, other user visible changes, detailed list below:

   - deprecated: user transaction ioctl

   - mount option ssd does not change allocation alignments

   - degraded read-write mount is allowed if all the raid profile
     constraints are met, now based on more accurate check

   - defrag: do not reset compression afterwards; the NOCOMPRESS flag
     can be now overriden by defrag

   - prep work for better extent reference tracking (related to the
     qgroup slowness with balance)

   - prep work for compression heuristics

   - memory allocation reductions (may help latencies on a loaded
     system)

   - better accounting for io waiting states

   - error handling improvements (removed BUGs)

   - added more sanity checks for shared refs

   - fix readdir vs pagefault deadlock under some circumstances

   - fix for 'no-hole' mode, certain combination of compressed and
     inline extents

   - send: fix emission of invalid clone operations

   - fixup file mode if setting acls fail

   - more fixes from fuzzing

   - oher cleanups"

* 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (104 commits)
  btrfs: submit superblock io with REQ_META and REQ_PRIO
  btrfs: remove unnecessary memory barrier in btrfs_direct_IO
  btrfs: remove superfluous chunk_tree argument from btrfs_alloc_dev_extent
  btrfs: Remove chunk_objectid parameter of btrfs_alloc_dev_extent
  btrfs: pass fs_info to btrfs_del_root instead of tree_root
  Btrfs: add one more sanity check for shared ref type
  Btrfs: remove BUG_ON in __add_tree_block
  Btrfs: remove BUG() in add_data_reference
  Btrfs: remove BUG() in print_extent_item
  Btrfs: remove BUG() in btrfs_extent_inline_ref_size
  Btrfs: convert to use btrfs_get_extent_inline_ref_type
  Btrfs: add a helper to retrive extent inline ref type
  btrfs: scrub: simplify scrub worker initialization
  btrfs: scrub: clean up division in scrub_find_csum
  btrfs: scrub: clean up division in __scrub_mark_bitmap
  btrfs: scrub: use bool for flush_all_writes
  btrfs: preserve i_mode if __btrfs_set_acl() fails
  btrfs: Remove extraneous chunk_objectid variable
  btrfs: Remove chunk_objectid argument from btrfs_make_block_group
  btrfs: Remove extra parentheses from condition in copy_items()
  ...

1  2 
fs/btrfs/check-integrity.c
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index fb07e3c22b9aaa7362861cd3451ee27f184a7d77,7b9aabc2cb785d1789af28a293f7b2331346d06b..7d5a9b51f0d7a81fa7d5adf9900e4fcc744a80c0
@@@ -296,7 -296,8 +296,7 @@@ static void btrfsic_dev_state_hashtable
                struct btrfsic_dev_state *ds,
                struct btrfsic_dev_state_hashtable *h);
  static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
 -static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
 -              struct block_device *bdev,
 +static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
                struct btrfsic_dev_state_hashtable *h);
  static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
  static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
@@@ -384,7 -385,8 +384,7 @@@ static int btrfsic_process_superblock_d
                int superblock_mirror_num,
                struct btrfsic_dev_state **selected_dev_state,
                struct btrfs_super_block *selected_super);
 -static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
 -              struct block_device *bdev);
 +static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev);
  static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
                                           u64 bytenr,
                                           struct btrfsic_dev_state *dev_state,
@@@ -624,15 -626,17 +624,15 @@@ static void btrfsic_dev_state_hashtable
        list_del(&ds->collision_resolving_node);
  }
  
 -static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
 -              struct block_device *bdev,
 +static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
                struct btrfsic_dev_state_hashtable *h)
  {
        const unsigned int hashval =
 -          (((unsigned int)((uintptr_t)bdev)) &
 -           (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
 +              dev & (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1);
        struct btrfsic_dev_state *ds;
  
        list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
 -              if (ds->bdev == bdev)
 +              if (ds->bdev->bd_dev == dev)
                        return ds;
        }
  
@@@ -664,7 -668,7 +664,7 @@@ static int btrfsic_process_superblock(s
                if (!device->bdev || !device->name)
                        continue;
  
 -              dev_state = btrfsic_dev_state_lookup(device->bdev);
 +              dev_state = btrfsic_dev_state_lookup(device->bdev->bd_dev);
                BUG_ON(NULL == dev_state);
                for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
                        ret = btrfsic_process_superblock_dev_mirror(
@@@ -791,12 -795,12 +791,12 @@@ static int btrfsic_process_superblock_d
        dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
        if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
                return -1;
-       bh = __bread(superblock_bdev, dev_bytenr / 4096,
+       bh = __bread(superblock_bdev, dev_bytenr / BTRFS_BDEV_BLOCKSIZE,
                     BTRFS_SUPER_INFO_SIZE);
        if (NULL == bh)
                return -1;
        super_tmp = (struct btrfs_super_block *)
-           (bh->b_data + (dev_bytenr & 4095));
+           (bh->b_data + (dev_bytenr & (BTRFS_BDEV_BLOCKSIZE - 1)));
  
        if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
            btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
@@@ -1552,7 -1556,7 +1552,7 @@@ static int btrfsic_map_block(struct btr
        }
  
        device = multi->stripes[0].dev;
 -      block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
 +      block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev);
        block_ctx_out->dev_bytenr = multi->stripes[0].physical;
        block_ctx_out->start = bytenr;
        block_ctx_out->len = len;
@@@ -1635,7 -1639,7 +1635,7 @@@ static int btrfsic_read_block(struct bt
                unsigned int j;
  
                bio = btrfs_io_bio_alloc(num_pages - i);
 -              bio->bi_bdev = block_ctx->dev->bdev;
 +              bio_set_dev(bio, block_ctx->dev->bdev);
                bio->bi_iter.bi_sector = dev_bytenr >> 9;
                bio_set_op_attrs(bio, REQ_OP_READ, 0);
  
@@@ -1728,7 -1732,7 +1728,7 @@@ static int btrfsic_test_for_metadata(st
        num_pages = state->metablock_size >> PAGE_SHIFT;
        h = (struct btrfs_header *)datav[0];
  
-       if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
+       if (memcmp(h->fsid, fs_info->fsid, BTRFS_FSID_SIZE))
                return 1;
  
        for (i = 0; i < num_pages; i++) {
@@@ -2650,7 -2654,7 +2650,7 @@@ static struct btrfsic_block *btrfsic_bl
                        pr_info("btrfsic: error, kmalloc failed!\n");
                        return NULL;
                }
 -              dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
 +              dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev);
                if (NULL == dev_state) {
                        pr_info("btrfsic: error, lookup dev_state failed!\n");
                        btrfsic_block_free(block);
@@@ -2730,9 -2734,10 +2730,9 @@@ static void btrfsic_cmp_log_and_dev_byt
        }
  }
  
 -static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
 -              struct block_device *bdev)
 +static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev)
  {
 -      return btrfsic_dev_state_hashtable_lookup(bdev,
 +      return btrfsic_dev_state_hashtable_lookup(dev,
                                                  &btrfsic_dev_state_hashtable);
  }
  
@@@ -2746,14 -2751,14 +2746,14 @@@ int btrfsic_submit_bh(int op, int op_fl
        mutex_lock(&btrfsic_mutex);
        /* since btrfsic_submit_bh() might also be called before
         * btrfsic_mount(), this might return NULL */
 -      dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
 +      dev_state = btrfsic_dev_state_lookup(bh->b_bdev->bd_dev);
  
        /* Only called to write the superblock (incl. FLUSH/FUA) */
        if (NULL != dev_state &&
            (op == REQ_OP_WRITE) && bh->b_size > 0) {
                u64 dev_bytenr;
  
-               dev_bytenr = 4096 * bh->b_blocknr;
+               dev_bytenr = BTRFS_BDEV_BLOCKSIZE * bh->b_blocknr;
                if (dev_state->state->print_mask &
                    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
                        pr_info("submit_bh(op=0x%x,0x%x, blocknr=%llu (bytenr %llu), size=%zu, data=%p, bdev=%p)\n",
@@@ -2803,7 -2808,7 +2803,7 @@@ static void __btrfsic_submit_bio(struc
        mutex_lock(&btrfsic_mutex);
        /* since btrfsic_submit_bio() is also called before
         * btrfsic_mount(), this might return NULL */
 -      dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
 +      dev_state = btrfsic_dev_state_lookup(bio_dev(bio));
        if (NULL != dev_state &&
            (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
                unsigned int i = 0;
                bio_is_patched = 0;
                if (dev_state->state->print_mask &
                    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
 -                      pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
 +                      pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n",
                               bio_op(bio), bio->bi_opf, segs,
                               (unsigned long long)bio->bi_iter.bi_sector,
 -                             dev_bytenr, bio->bi_bdev);
 +                             dev_bytenr, bio->bi_disk);
  
                mapped_datav = kmalloc_array(segs,
                                             sizeof(*mapped_datav), GFP_NOFS);
        } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
                if (dev_state->state->print_mask &
                    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
 -                      pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
 -                             bio_op(bio), bio->bi_opf, bio->bi_bdev);
 +                      pr_info("submit_bio(rw=%d,0x%x FLUSH, disk=%p)\n",
 +                             bio_op(bio), bio->bi_opf, bio->bi_disk);
                if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
                        if ((dev_state->state->print_mask &
                             (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@@ -2993,7 -2998,7 +2993,7 @@@ void btrfsic_unmount(struct btrfs_fs_de
                        continue;
  
                ds = btrfsic_dev_state_hashtable_lookup(
 -                              device->bdev,
 +                              device->bdev->bd_dev,
                                &btrfsic_dev_state_hashtable);
                if (NULL != ds) {
                        state = ds->state;
diff --combined fs/btrfs/disk-io.c
index 4f428a48d51359a78c42940509f801734e1050c6,27d45864053661a6393a9de5c461a720181fb863..46329524dd5f46323fb76f47c83c86403c2de6ec
@@@ -529,7 -529,7 +529,7 @@@ static int check_tree_block_fsid(struc
                                 struct extent_buffer *eb)
  {
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
-       u8 fsid[BTRFS_UUID_SIZE];
+       u8 fsid[BTRFS_FSID_SIZE];
        int ret = 1;
  
        read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);
@@@ -1343,7 -1343,7 +1343,7 @@@ static void __setup_root(struct btrfs_r
        atomic_set(&root->log_batch, 0);
        atomic_set(&root->orphan_inodes, 0);
        refcount_set(&root->refs, 1);
-       atomic_set(&root->will_be_snapshoted, 0);
+       atomic_set(&root->will_be_snapshotted, 0);
        atomic64_set(&root->qgroup_meta_rsv, 0);
        root->log_transid = 0;
        root->log_transid_committed = -1;
@@@ -2694,8 -2694,8 +2694,8 @@@ int open_ctree(struct super_block *sb
        btrfs_init_balance(fs_info);
        btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
  
-       sb->s_blocksize = 4096;
-       sb->s_blocksize_bits = blksize_bits(4096);
+       sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
+       sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
  
        btrfs_init_btree_inode(fs_info);
  
@@@ -3035,15 -3035,10 +3035,10 @@@ retry_root_backup
                btrfs_err(fs_info, "failed to read block groups: %d", ret);
                goto fail_sysfs;
        }
-       fs_info->num_tolerated_disk_barrier_failures =
-               btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
-       if (fs_info->fs_devices->missing_devices >
-            fs_info->num_tolerated_disk_barrier_failures &&
-           !(sb->s_flags & MS_RDONLY)) {
+       if (!(sb->s_flags & MS_RDONLY) && !btrfs_check_rw_degradable(fs_info)) {
                btrfs_warn(fs_info,
- "missing devices (%llu) exceeds the limit (%d), writeable mount is not allowed",
-                       fs_info->fs_devices->missing_devices,
-                       fs_info->num_tolerated_disk_barrier_failures);
+               "writeable mount is not allowed due to too many missing devices");
                goto fail_sysfs;
        }
  
        if (IS_ERR(fs_info->transaction_kthread))
                goto fail_cleaner;
  
-       if (!btrfs_test_opt(fs_info, SSD) &&
-           !btrfs_test_opt(fs_info, NOSSD) &&
+       if (!btrfs_test_opt(fs_info, NOSSD) &&
            !fs_info->fs_devices->rotating) {
-               btrfs_info(fs_info, "detected SSD devices, enabling SSD mode");
-               btrfs_set_opt(fs_info->mount_opt, SSD);
+               btrfs_set_and_info(fs_info, SSD, "enabling ssd optimizations");
        }
  
        /*
@@@ -3321,7 -3314,7 +3314,7 @@@ int btrfs_read_dev_one_super(struct blo
        if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
                return -EINVAL;
  
-       bh = __bread(bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE);
+       bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
        /*
         * If we fail to read from the underlying devices, as of now
         * the best option we have is to mark it EIO.
@@@ -3378,19 -3371,17 +3371,17 @@@ struct buffer_head *btrfs_read_dev_supe
  }
  
  /*
-  * this should be called twice, once with wait == 0 and
-  * once with wait == 1.  When wait == 0 is done, all the buffer heads
-  * we write are pinned.
+  * Write superblock @sb to the @device. Do not wait for completion, all the
+  * buffer heads we write are pinned.
   *
-  * They are released when wait == 1 is done.
-  * max_mirrors must be the same for both runs, and it indicates how
-  * many supers on this one device should be written.
+  * Write @max_mirrors copies of the superblock, where 0 means default that fit
+  * the expected device size at commit time. Note that max_mirrors must be
+  * same for write and wait phases.
   *
-  * max_mirrors == 0 means to write them all.
+  * Return number of errors when buffer head is not found or submission fails.
   */
  static int write_dev_supers(struct btrfs_device *device,
-                           struct btrfs_super_block *sb,
-                           int wait, int max_mirrors)
+                           struct btrfs_super_block *sb, int max_mirrors)
  {
        struct buffer_head *bh;
        int i;
                    device->commit_total_bytes)
                        break;
  
-               if (wait) {
-                       bh = __find_get_block(device->bdev, bytenr / 4096,
-                                             BTRFS_SUPER_INFO_SIZE);
-                       if (!bh) {
-                               errors++;
-                               continue;
-                       }
-                       wait_on_buffer(bh);
-                       if (!buffer_uptodate(bh))
-                               errors++;
+               btrfs_set_super_bytenr(sb, bytenr);
  
-                       /* drop our reference */
-                       brelse(bh);
+               crc = ~(u32)0;
+               crc = btrfs_csum_data((const char *)sb + BTRFS_CSUM_SIZE, crc,
+                                     BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+               btrfs_csum_final(crc, sb->csum);
  
-                       /* drop the reference from the wait == 0 run */
-                       brelse(bh);
+               /* One reference for us, and we leave it for the caller */
+               bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
+                             BTRFS_SUPER_INFO_SIZE);
+               if (!bh) {
+                       btrfs_err(device->fs_info,
+                           "couldn't get super buffer head for bytenr %llu",
+                           bytenr);
+                       errors++;
                        continue;
-               } else {
-                       btrfs_set_super_bytenr(sb, bytenr);
-                       crc = ~(u32)0;
-                       crc = btrfs_csum_data((const char *)sb +
-                                             BTRFS_CSUM_SIZE, crc,
-                                             BTRFS_SUPER_INFO_SIZE -
-                                             BTRFS_CSUM_SIZE);
-                       btrfs_csum_final(crc, sb->csum);
-                       /*
-                        * one reference for us, and we leave it for the
-                        * caller
-                        */
-                       bh = __getblk(device->bdev, bytenr / 4096,
-                                     BTRFS_SUPER_INFO_SIZE);
-                       if (!bh) {
-                               btrfs_err(device->fs_info,
-                                   "couldn't get super buffer head for bytenr %llu",
-                                   bytenr);
-                               errors++;
-                               continue;
-                       }
+               }
  
-                       memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
+               memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
  
-                       /* one reference for submit_bh */
-                       get_bh(bh);
+               /* one reference for submit_bh */
+               get_bh(bh);
  
-                       set_buffer_uptodate(bh);
-                       lock_buffer(bh);
-                       bh->b_end_io = btrfs_end_buffer_write_sync;
-                       bh->b_private = device;
-               }
+               set_buffer_uptodate(bh);
+               lock_buffer(bh);
+               bh->b_end_io = btrfs_end_buffer_write_sync;
+               bh->b_private = device;
  
                /*
                 * we fua the first super.  The others we allow
                 */
                if (i == 0) {
                        ret = btrfsic_submit_bh(REQ_OP_WRITE,
-                                               REQ_SYNC | REQ_FUA, bh);
+                               REQ_SYNC | REQ_FUA | REQ_META | REQ_PRIO, bh);
                } else {
-                       ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+                       ret = btrfsic_submit_bh(REQ_OP_WRITE,
+                               REQ_SYNC | REQ_META | REQ_PRIO, bh);
                }
                if (ret)
                        errors++;
        return errors < i ? 0 : -1;
  }
  
+ /*
+  * Wait for write completion of superblocks done by write_dev_supers,
+  * @max_mirrors same for write and wait phases.
+  *
+  * Return number of errors when buffer head is not found or not marked up to
+  * date.
+  */
+ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
+ {
+       struct buffer_head *bh;
+       int i;
+       int errors = 0;
+       u64 bytenr;
+       if (max_mirrors == 0)
+               max_mirrors = BTRFS_SUPER_MIRROR_MAX;
+       for (i = 0; i < max_mirrors; i++) {
+               bytenr = btrfs_sb_offset(i);
+               if (bytenr + BTRFS_SUPER_INFO_SIZE >=
+                   device->commit_total_bytes)
+                       break;
+               bh = __find_get_block(device->bdev,
+                                     bytenr / BTRFS_BDEV_BLOCKSIZE,
+                                     BTRFS_SUPER_INFO_SIZE);
+               if (!bh) {
+                       errors++;
+                       continue;
+               }
+               wait_on_buffer(bh);
+               if (!buffer_uptodate(bh))
+                       errors++;
+               /* drop our reference */
+               brelse(bh);
+               /* drop the reference from the writing run */
+               brelse(bh);
+       }
+       return errors < i ? 0 : -1;
+ }
  /*
   * endio for the write_dev_flush, this will wake anyone waiting
   * for the barrier when it is done
@@@ -3499,12 -3511,12 +3511,12 @@@ static void write_dev_flush(struct btrf
  
        bio_reset(bio);
        bio->bi_end_io = btrfs_end_empty_barrier;
 -      bio->bi_bdev = device->bdev;
 +      bio_set_dev(bio, device->bdev);
        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
        init_completion(&device->flush_wait);
        bio->bi_private = &device->flush_wait;
  
-       submit_bio(bio);
+       btrfsic_submit_bio(bio);
        device->flush_bio_sent = 1;
  }
  
@@@ -3516,7 -3528,7 +3528,7 @@@ static blk_status_t wait_dev_flush(stru
        struct bio *bio = device->flush_bio;
  
        if (!device->flush_bio_sent)
 -              return 0;
 +              return BLK_STS_OK;
  
        device->flush_bio_sent = 0;
        wait_for_completion_io(&device->flush_wait);
        return bio->bi_status;
  }
  
- static int check_barrier_error(struct btrfs_fs_devices *fsdevs)
+ static int check_barrier_error(struct btrfs_fs_info *fs_info)
  {
-       int dev_flush_error = 0;
-       struct btrfs_device *dev;
-       list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
-               if (!dev->bdev || dev->last_flush_error)
-                       dev_flush_error++;
-       }
-       if (dev_flush_error >
-           fsdevs->fs_info->num_tolerated_disk_barrier_failures)
+       if (!btrfs_check_rw_degradable(fs_info))
                return -EIO;
        return 0;
  }
  
@@@ -3563,7 -3565,7 +3565,7 @@@ static int barrier_all_devices(struct b
                        continue;
  
                write_dev_flush(dev);
 -              dev->last_flush_error = 0;
 +              dev->last_flush_error = BLK_STS_OK;
        }
  
        /* wait for all the barriers */
                 * to arrive at the volume status. So error checking
                 * is being pushed to a separate loop.
                 */
-               return check_barrier_error(info->fs_devices);
+               return check_barrier_error(info);
        }
        return 0;
  }
@@@ -3626,60 -3628,6 +3628,6 @@@ int btrfs_get_num_tolerated_disk_barrie
        return min_tolerated;
  }
  
- int btrfs_calc_num_tolerated_disk_barrier_failures(
-       struct btrfs_fs_info *fs_info)
- {
-       struct btrfs_ioctl_space_info space;
-       struct btrfs_space_info *sinfo;
-       u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
-                      BTRFS_BLOCK_GROUP_SYSTEM,
-                      BTRFS_BLOCK_GROUP_METADATA,
-                      BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
-       int i;
-       int c;
-       int num_tolerated_disk_barrier_failures =
-               (int)fs_info->fs_devices->num_devices;
-       for (i = 0; i < ARRAY_SIZE(types); i++) {
-               struct btrfs_space_info *tmp;
-               sinfo = NULL;
-               rcu_read_lock();
-               list_for_each_entry_rcu(tmp, &fs_info->space_info, list) {
-                       if (tmp->flags == types[i]) {
-                               sinfo = tmp;
-                               break;
-                       }
-               }
-               rcu_read_unlock();
-               if (!sinfo)
-                       continue;
-               down_read(&sinfo->groups_sem);
-               for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
-                       u64 flags;
-                       if (list_empty(&sinfo->block_groups[c]))
-                               continue;
-                       btrfs_get_block_group_info(&sinfo->block_groups[c],
-                                                  &space);
-                       if (space.total_bytes == 0 || space.used_bytes == 0)
-                               continue;
-                       flags = space.flags;
-                       num_tolerated_disk_barrier_failures = min(
-                               num_tolerated_disk_barrier_failures,
-                               btrfs_get_num_tolerated_disk_barrier_failures(
-                                       flags));
-               }
-               up_read(&sinfo->groups_sem);
-       }
-       return num_tolerated_disk_barrier_failures;
- }
  int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
  {
        struct list_head *head;
                btrfs_set_stack_device_io_width(dev_item, dev->io_width);
                btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
                memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
-               memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
+               memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_FSID_SIZE);
  
                flags = btrfs_super_flags(sb);
                btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
  
-               ret = write_dev_supers(dev, sb, 0, max_mirrors);
+               ret = write_dev_supers(dev, sb, max_mirrors);
                if (ret)
                        total_errors++;
        }
                if (!dev->in_fs_metadata || !dev->writeable)
                        continue;
  
-               ret = write_dev_supers(dev, sb, 1, max_mirrors);
+               ret = wait_dev_supers(dev, max_mirrors);
                if (ret)
                        total_errors++;
        }
@@@ -3995,7 -3943,6 +3943,6 @@@ void close_ctree(struct btrfs_fs_info *
        __btrfs_free_block_rsv(root->orphan_block_rsv);
        root->orphan_block_rsv = NULL;
  
-       mutex_lock(&fs_info->chunk_mutex);
        while (!list_empty(&fs_info->pinned_chunks)) {
                struct extent_map *em;
  
                list_del_init(&em->list);
                free_extent_map(em);
        }
-       mutex_unlock(&fs_info->chunk_mutex);
  }
  
  int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
@@@ -4053,7 -3999,7 +3999,7 @@@ void btrfs_mark_buffer_dirty(struct ext
                                         fs_info->dirty_metadata_batch);
  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
-               btrfs_print_leaf(fs_info, buf);
+               btrfs_print_leaf(buf);
                ASSERT(0);
        }
  #endif
@@@ -4173,7 -4119,7 +4119,7 @@@ static int btrfs_check_super_valid(stru
                ret = -EINVAL;
        }
  
-       if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
+       if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
                btrfs_err(fs_info,
                           "dev_item UUID does not match fsid: %pU != %pU",
                           fs_info->fsid, sb->dev_item.fsid);
diff --combined fs/btrfs/extent_io.c
index 42b12a85ab49d929e29742fcb97be67d0f2d45cc,d17783d702287540d73c3943d6fd7957f58cb0f3..0f077c5db58ec8b985a23fcf9a441ff0ecb39400
@@@ -20,7 -20,6 +20,6 @@@
  #include "locking.h"
  #include "rcu-string.h"
  #include "backref.h"
- #include "transaction.h"
  
  static struct kmem_cache *extent_state_cache;
  static struct kmem_cache *extent_buffer_cache;
@@@ -1998,7 -1997,7 +1997,7 @@@ int repair_io_failure(struct btrfs_fs_i
         * read repair operation.
         */
        btrfs_bio_counter_inc_blocked(fs_info);
-       if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num)) {
+       if (btrfs_is_parity_mirror(fs_info, logical, length)) {
                /*
                 * Note that we don't use BTRFS_MAP_WRITE because it's supposed
                 * to update all raid stripes, but here we just want to correct
                bio_put(bio);
                return -EIO;
        }
 -      bio->bi_bdev = dev->bdev;
 +      bio_set_dev(bio, dev->bdev);
        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
        bio_add_page(bio, page, length, pg_offset);
  
@@@ -2335,7 -2334,7 +2334,7 @@@ struct bio *btrfs_create_repair_bio(str
        bio = btrfs_io_bio_alloc(1);
        bio->bi_end_io = endio_func;
        bio->bi_iter.bi_sector = failrec->logical >> 9;
 -      bio->bi_bdev = fs_info->fs_devices->latest_bdev;
 +      bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
        bio->bi_iter.bi_size = 0;
        bio->bi_private = data;
  
@@@ -2675,7 -2674,7 +2674,7 @@@ struct bio *btrfs_bio_alloc(struct bloc
        struct bio *bio;
  
        bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
 -      bio->bi_bdev = bdev;
 +      bio_set_dev(bio, bdev);
        bio->bi_iter.bi_sector = first_byte >> 9;
        btrfs_io_bio_init(btrfs_io_bio(bio));
        return bio;
@@@ -2757,7 -2756,10 +2756,10 @@@ static int merge_bio(struct extent_io_t
  
  }
  
- static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
+ /*
+  * @opf:      bio REQ_OP_* and REQ_* flags as one value
+  */
+ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
                              struct writeback_control *wbc,
                              struct page *page, sector_t sector,
                              size_t size, unsigned long offset,
        bio->bi_end_io = end_io_func;
        bio->bi_private = tree;
        bio->bi_write_hint = page->mapping->host->i_write_hint;
-       bio_set_op_attrs(bio, op, op_flags);
+       bio->bi_opf = opf;
        if (wbc) {
                wbc_init_bio(wbc, bio);
                wbc_account_io(wbc, page, page_size);
@@@ -2878,7 -2880,7 +2880,7 @@@ static int __do_readpage(struct extent_
                         get_extent_t *get_extent,
                         struct extent_map **em_cached,
                         struct bio **bio, int mirror_num,
-                        unsigned long *bio_flags, int read_flags,
+                        unsigned long *bio_flags, unsigned int read_flags,
                         u64 *prev_em_start)
  {
        struct inode *inode = page->mapping->host;
                        continue;
                }
  
-               ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
+               ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
                                         page, sector, disk_io_size, pg_offset,
                                         bdev, bio,
                                         end_bio_extent_readpage, mirror_num,
@@@ -3164,7 -3166,8 +3166,8 @@@ static int __extent_read_full_page(stru
                                   struct page *page,
                                   get_extent_t *get_extent,
                                   struct bio **bio, int mirror_num,
-                                  unsigned long *bio_flags, int read_flags)
+                                  unsigned long *bio_flags,
+                                  unsigned int read_flags)
  {
        struct inode *inode = page->mapping->host;
        struct btrfs_ordered_extent *ordered;
@@@ -3311,7 -3314,7 +3314,7 @@@ static noinline_for_stack int __extent_
                                 struct extent_page_data *epd,
                                 loff_t i_size,
                                 unsigned long nr_written,
-                                int write_flags, int *nr_ret)
+                                unsigned int write_flags, int *nr_ret)
  {
        struct extent_io_tree *tree = epd->tree;
        u64 start = page_offset(page);
                               page->index, cur, end);
                }
  
-               ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+               ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
                                         page, sector, iosize, pg_offset,
                                         bdev, &epd->bio,
                                         end_bio_extent_writepage,
@@@ -3465,7 -3468,7 +3468,7 @@@ static int __extent_writepage(struct pa
        size_t pg_offset = 0;
        loff_t i_size = i_size_read(inode);
        unsigned long end_index = i_size >> PAGE_SHIFT;
-       int write_flags = 0;
+       unsigned int write_flags = 0;
        unsigned long nr_written = 0;
  
        if (wbc->sync_mode == WB_SYNC_ALL)
@@@ -3715,7 -3718,7 +3718,7 @@@ static noinline_for_stack int write_one
        unsigned long i, num_pages;
        unsigned long bio_flags = 0;
        unsigned long start, end;
-       int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
+       unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
        int ret = 0;
  
        clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
  
                clear_page_dirty_for_io(p);
                set_page_writeback(p);
-               ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+               ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
                                         p, offset >> 9, PAGE_SIZE, 0, bdev,
                                         &epd->bio,
                                         end_bio_extent_buffer_writepage,
@@@ -4606,24 -4609,11 +4609,11 @@@ int extent_fiemap(struct inode *inode, 
                        flags |= (FIEMAP_EXTENT_DELALLOC |
                                  FIEMAP_EXTENT_UNKNOWN);
                } else if (fieinfo->fi_extents_max) {
-                       struct btrfs_trans_handle *trans;
                        u64 bytenr = em->block_start -
                                (em->start - em->orig_start);
  
                        disko = em->block_start + offset_in_extent;
  
-                       /*
-                        * We need a trans handle to get delayed refs
-                        */
-                       trans = btrfs_join_transaction(root);
-                       /*
-                        * It's OK if we can't start a trans we can still check
-                        * from commit_root
-                        */
-                       if (IS_ERR(trans))
-                               trans = NULL;
                        /*
                         * As btrfs supports shared space, this information
                         * can be exported to userspace tools via
                         * then we're just getting a count and we can skip the
                         * lookup stuff.
                         */
-                       ret = btrfs_check_shared(trans, root->fs_info,
-                                       root->objectid,
-                                       btrfs_ino(BTRFS_I(inode)), bytenr);
-                       if (trans)
-                               btrfs_end_transaction(trans);
+                       ret = btrfs_check_shared(root,
+                                                btrfs_ino(BTRFS_I(inode)),
+                                                bytenr);
                        if (ret < 0)
                                goto out_free;
                        if (ret)
@@@ -5405,9 -5393,8 +5393,8 @@@ unlock_exit
        return ret;
  }
  
- void read_extent_buffer(struct extent_buffer *eb, void *dstv,
-                       unsigned long start,
-                       unsigned long len)
+ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
+                       unsigned long start, unsigned long len)
  {
        size_t cur;
        size_t offset;
        size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
        unsigned long i = (start_offset + start) >> PAGE_SHIFT;
  
-       WARN_ON(start > eb->len);
-       WARN_ON(start + len > eb->start + eb->len);
+       if (start + len > eb->len) {
+               WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
+                    eb->start, eb->len, start, len);
+               memset(dst, 0, len);
+               return;
+       }
  
        offset = (start_offset + start) & (PAGE_SIZE - 1);
  
        }
  }
  
- int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
-                       unsigned long start,
-                       unsigned long len)
+ int read_extent_buffer_to_user(const struct extent_buffer *eb,
+                              void __user *dstv,
+                              unsigned long start, unsigned long len)
  {
        size_t cur;
        size_t offset;
   * return 1 if the item spans two pages.
   * return -EINVAL otherwise.
   */
- int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                              unsigned long min_len, char **map,
-                              unsigned long *map_start,
-                              unsigned long *map_len)
+ int map_private_extent_buffer(const struct extent_buffer *eb,
+                             unsigned long start, unsigned long min_len,
+                             char **map, unsigned long *map_start,
+                             unsigned long *map_len)
  {
        size_t offset = start & (PAGE_SIZE - 1);
        char *kaddr;
        unsigned long end_i = (start_offset + start + min_len - 1) >>
                PAGE_SHIFT;
  
+       if (start + min_len > eb->len) {
+               WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
+                      eb->start, eb->len, start, min_len);
+               return -EINVAL;
+       }
        if (i != end_i)
                return 1;
  
                *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
        }
  
-       if (start + min_len > eb->len) {
-               WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
-                      eb->start, eb->len, start, min_len);
-               return -EINVAL;
-       }
        p = eb->pages[i];
        kaddr = page_address(p);
        *map = kaddr + offset;
        return 0;
  }
  
- int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
-                         unsigned long start,
-                         unsigned long len)
+ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+                        unsigned long start, unsigned long len)
  {
        size_t cur;
        size_t offset;
diff --combined fs/btrfs/inode.c
index 24bcd5cd9cf2fc680cc7ed32fb58cd254bb055b5,d184a46e46c49b0af26867b64a4bc3d5d604161b..17ad018da0a253d8c593c77cea554ca7f234fcba
@@@ -392,20 -392,23 +392,23 @@@ static noinline int add_async_extent(st
        return 0;
  }
  
- static inline int inode_need_compress(struct inode *inode)
+ static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
  {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
  
        /* force compress */
        if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
                return 1;
+       /* defrag ioctl */
+       if (BTRFS_I(inode)->defrag_compress)
+               return 1;
        /* bad compression ratios */
        if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
                return 0;
        if (btrfs_test_opt(fs_info, COMPRESS) ||
            BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
-           BTRFS_I(inode)->force_compress)
-               return 1;
+           BTRFS_I(inode)->prop_compress)
+               return btrfs_compress_heuristic(inode, start, end);
        return 0;
  }
  
@@@ -503,7 -506,7 +506,7 @@@ again
         * inode has not been flagged as nocompress.  This flag can
         * change at any time if we discover bad compression ratios.
         */
-       if (inode_need_compress(inode)) {
+       if (inode_need_compress(inode, start, end)) {
                WARN_ON(pages);
                pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
                if (!pages) {
                        goto cont;
                }
  
-               if (BTRFS_I(inode)->force_compress)
-                       compress_type = BTRFS_I(inode)->force_compress;
+               if (BTRFS_I(inode)->defrag_compress)
+                       compress_type = BTRFS_I(inode)->defrag_compress;
+               else if (BTRFS_I(inode)->prop_compress)
+                       compress_type = BTRFS_I(inode)->prop_compress;
  
                /*
                 * we need to call clear_page_dirty_for_io on each
@@@ -645,7 -650,7 +650,7 @@@ cont
  
                /* flag the file so we don't compress in the future */
                if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
-                   !(BTRFS_I(inode)->force_compress)) {
+                   !(BTRFS_I(inode)->prop_compress)) {
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
                }
        }
@@@ -1381,7 -1386,7 +1386,7 @@@ next_slot
                         * we fall into common COW way.
                         */
                        if (!nolock) {
-                               err = btrfs_start_write_no_snapshoting(root);
+                               err = btrfs_start_write_no_snapshotting(root);
                                if (!err)
                                        goto out_check;
                        }
                        if (csum_exist_in_range(fs_info, disk_bytenr,
                                                num_bytes)) {
                                if (!nolock)
-                                       btrfs_end_write_no_snapshoting(root);
+                                       btrfs_end_write_no_snapshotting(root);
                                goto out_check;
                        }
                        if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
                                if (!nolock)
-                                       btrfs_end_write_no_snapshoting(root);
+                                       btrfs_end_write_no_snapshotting(root);
                                goto out_check;
                        }
                        nocow = 1;
@@@ -1415,7 -1420,7 +1420,7 @@@ out_check
                if (extent_end <= start) {
                        path->slots[0]++;
                        if (!nolock && nocow)
-                               btrfs_end_write_no_snapshoting(root);
+                               btrfs_end_write_no_snapshotting(root);
                        if (nocow)
                                btrfs_dec_nocow_writers(fs_info, disk_bytenr);
                        goto next_slot;
                                             NULL);
                        if (ret) {
                                if (!nolock && nocow)
-                                       btrfs_end_write_no_snapshoting(root);
+                                       btrfs_end_write_no_snapshotting(root);
                                if (nocow)
                                        btrfs_dec_nocow_writers(fs_info,
                                                                disk_bytenr);
                                          BTRFS_ORDERED_PREALLOC);
                        if (IS_ERR(em)) {
                                if (!nolock && nocow)
-                                       btrfs_end_write_no_snapshoting(root);
+                                       btrfs_end_write_no_snapshotting(root);
                                if (nocow)
                                        btrfs_dec_nocow_writers(fs_info,
                                                                disk_bytenr);
                                             PAGE_UNLOCK | PAGE_SET_PRIVATE2);
  
                if (!nolock && nocow)
-                       btrfs_end_write_no_snapshoting(root);
+                       btrfs_end_write_no_snapshotting(root);
                cur_offset = extent_end;
  
                /*
@@@ -1576,7 -1581,7 +1581,7 @@@ static int run_delalloc_range(void *pri
        } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
-       } else if (!inode_need_compress(inode)) {
+       } else if (!inode_need_compress(inode, start, end)) {
                ret = cow_file_range(inode, locked_page, start, end, end,
                                      page_started, nr_written, 1, NULL);
        } else {
@@@ -1796,10 -1801,11 +1801,11 @@@ static void btrfs_clear_bit_hook(void *
        u64 len = state->end + 1 - state->start;
        u32 num_extents = count_max_extents(len);
  
-       spin_lock(&inode->lock);
-       if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
+       if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
+               spin_lock(&inode->lock);
                inode->defrag_bytes -= len;
-       spin_unlock(&inode->lock);
+               spin_unlock(&inode->lock);
+       }
  
        /*
         * set_bit and clear bit hooks normally require _irqsave/restore
@@@ -3159,8 -3165,6 +3165,6 @@@ zeroit
        memset(kaddr + pgoff, 1, len);
        flush_dcache_page(page);
        kunmap_atomic(kaddr);
-       if (csum_expected == 0)
-               return 0;
        return -EIO;
  }
  
@@@ -5055,7 -5059,7 +5059,7 @@@ static int btrfs_setsize(struct inode *
  
        if (newsize > oldsize) {
                /*
-                * Don't do an expanding truncate while snapshoting is ongoing.
+                * Don't do an expanding truncate while snapshotting is ongoing.
                 * This is to ensure the snapshot captures a fully consistent
                 * state of this file - if the snapshot captures this expanding
                 * truncation, it must capture all writes that happened before
                btrfs_wait_for_snapshot_creation(root);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
                if (ret) {
-                       btrfs_end_write_no_snapshoting(root);
+                       btrfs_end_write_no_snapshotting(root);
                        return ret;
                }
  
                trans = btrfs_start_transaction(root, 1);
                if (IS_ERR(trans)) {
-                       btrfs_end_write_no_snapshoting(root);
+                       btrfs_end_write_no_snapshotting(root);
                        return PTR_ERR(trans);
                }
  
                btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
                pagecache_isize_extended(inode, oldsize, newsize);
                ret = btrfs_update_inode(trans, root, inode);
-               btrfs_end_write_no_snapshoting(root);
+               btrfs_end_write_no_snapshotting(root);
                btrfs_end_transaction(trans);
        } else {
  
@@@ -5873,25 -5877,74 +5877,74 @@@ unsigned char btrfs_filetype_table[] = 
        DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
  };
  
+ /*
+  * All this infrastructure exists because dir_emit can fault, and we are holding
+  * the tree lock when doing readdir.  For now just allocate a buffer and copy
+  * our information into that, and then dir_emit from the buffer.  This is
+  * similar to what NFS does, only we don't keep the buffer around in pagecache
+  * because I'm afraid I'll mess that up.  Long term we need to make filldir do
+  * copy_to_user_inatomic so we don't have to worry about page faulting under the
+  * tree lock.
+  */
+ static int btrfs_opendir(struct inode *inode, struct file *file)
+ {
+       struct btrfs_file_private *private;
+       private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
+       if (!private)
+               return -ENOMEM;
+       private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!private->filldir_buf) {
+               kfree(private);
+               return -ENOMEM;
+       }
+       file->private_data = private;
+       return 0;
+ }
+ struct dir_entry {
+       u64 ino;
+       u64 offset;
+       unsigned type;
+       int name_len;
+ };
+ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
+ {
+       while (entries--) {
+               struct dir_entry *entry = addr;
+               char *name = (char *)(entry + 1);
+               ctx->pos = entry->offset;
+               if (!dir_emit(ctx, name, entry->name_len, entry->ino,
+                             entry->type))
+                       return 1;
+               addr += sizeof(struct dir_entry) + entry->name_len;
+               ctx->pos++;
+       }
+       return 0;
+ }
  static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
  {
        struct inode *inode = file_inode(file);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_file_private *private = file->private_data;
        struct btrfs_dir_item *di;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
+       void *addr;
        struct list_head ins_list;
        struct list_head del_list;
        int ret;
        struct extent_buffer *leaf;
        int slot;
-       unsigned char d_type;
-       int over = 0;
-       char tmp_name[32];
        char *name_ptr;
        int name_len;
+       int entries = 0;
+       int total_len = 0;
        bool put = false;
        struct btrfs_key location;
  
        if (!path)
                return -ENOMEM;
  
+       addr = private->filldir_buf;
        path->reada = READA_FORWARD;
  
        INIT_LIST_HEAD(&ins_list);
        INIT_LIST_HEAD(&del_list);
        put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
  
+ again:
        key.type = BTRFS_DIR_INDEX_KEY;
        key.offset = ctx->pos;
        key.objectid = btrfs_ino(BTRFS_I(inode));
                goto err;
  
        while (1) {
+               struct dir_entry *entry;
                leaf = path->nodes[0];
                slot = path->slots[0];
                if (slot >= btrfs_header_nritems(leaf)) {
                        goto next;
                if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
                        goto next;
-               ctx->pos = found_key.offset;
                di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
                if (verify_dir_item(fs_info, leaf, slot, di))
                        goto next;
  
                name_len = btrfs_dir_name_len(leaf, di);
-               if (name_len <= sizeof(tmp_name)) {
-                       name_ptr = tmp_name;
-               } else {
-                       name_ptr = kmalloc(name_len, GFP_KERNEL);
-                       if (!name_ptr) {
-                               ret = -ENOMEM;
-                               goto err;
-                       }
+               if ((total_len + sizeof(struct dir_entry) + name_len) >=
+                   PAGE_SIZE) {
+                       btrfs_release_path(path);
+                       ret = btrfs_filldir(private->filldir_buf, entries, ctx);
+                       if (ret)
+                               goto nopos;
+                       addr = private->filldir_buf;
+                       entries = 0;
+                       total_len = 0;
+                       goto again;
                }
+               entry = addr;
+               entry->name_len = name_len;
+               name_ptr = (char *)(entry + 1);
                read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
                                   name_len);
-               d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
+               entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
                btrfs_dir_item_key_to_cpu(leaf, di, &location);
-               over = !dir_emit(ctx, name_ptr, name_len, location.objectid,
-                                d_type);
-               if (name_ptr != tmp_name)
-                       kfree(name_ptr);
-               if (over)
-                       goto nopos;
-               ctx->pos++;
+               entry->ino = location.objectid;
+               entry->offset = found_key.offset;
+               entries++;
+               addr += sizeof(struct dir_entry) + name_len;
+               total_len += sizeof(struct dir_entry) + name_len;
  next:
                path->slots[0]++;
        }
+       btrfs_release_path(path);
+       ret = btrfs_filldir(private->filldir_buf, entries, ctx);
+       if (ret)
+               goto nopos;
  
        ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
        if (ret)
@@@ -6185,6 -6244,37 +6244,37 @@@ static int btrfs_insert_inode_locked(st
                   btrfs_find_actor, &args);
  }
  
+ /*
+  * Inherit flags from the parent inode.
+  *
+  * Currently only the compression flags and the cow flags are inherited.
+  */
+ static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
+ {
+       unsigned int flags;
+       if (!dir)
+               return;
+       flags = BTRFS_I(dir)->flags;
+       if (flags & BTRFS_INODE_NOCOMPRESS) {
+               BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+               BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+       } else if (flags & BTRFS_INODE_COMPRESS) {
+               BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+               BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
+       }
+       if (flags & BTRFS_INODE_NODATACOW) {
+               BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+               if (S_ISREG(inode->i_mode))
+                       BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
+       }
+       btrfs_update_iflags(inode);
+ }
  static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct inode *dir,
        return ret;
  }
  
 -static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
 -                                      int mirror_num)
 +static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
 +                                               struct bio *bio,
 +                                               int mirror_num)
  {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 -      int ret;
 +      blk_status_t ret;
  
        BUG_ON(bio_op(bio) == REQ_OP_WRITE);
  
@@@ -7981,32 -8070,31 +8071,32 @@@ static int btrfs_check_dio_repairable(s
        return 1;
  }
  
 -static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 -                      struct page *page, unsigned int pgoff,
 -                      u64 start, u64 end, int failed_mirror,
 -                      bio_end_io_t *repair_endio, void *repair_arg)
 +static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
 +                                 struct page *page, unsigned int pgoff,
 +                                 u64 start, u64 end, int failed_mirror,
 +                                 bio_end_io_t *repair_endio, void *repair_arg)
  {
        struct io_failure_record *failrec;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
        struct bio *bio;
        int isector;
-       int read_mode = 0;
+       unsigned int read_mode = 0;
        int segs;
        int ret;
 +      blk_status_t status;
  
        BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
  
        ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
        if (ret)
 -              return ret;
 +              return errno_to_blk_status(ret);
  
        ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
                                         failed_mirror);
        if (!ret) {
                free_io_failure(failure_tree, io_tree, failrec);
 -              return -EIO;
 +              return BLK_STS_IOERR;
        }
  
        segs = bio_segments(failed_bio);
        bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
  
        btrfs_debug(BTRFS_I(inode)->root->fs_info,
-                   "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
+                   "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
                    read_mode, failrec->this_mirror, failrec->in_validation);
  
 -      ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
 -      if (ret) {
 +      status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
 +      if (status) {
                free_io_failure(failure_tree, io_tree, failrec);
                bio_put(bio);
        }
  
 -      return ret;
 +      return status;
  }
  
  struct btrfs_retry_complete {
@@@ -8067,8 -8155,8 +8157,8 @@@ end
        bio_put(bio);
  }
  
 -static int __btrfs_correct_data_nocsum(struct inode *inode,
 -                                     struct btrfs_io_bio *io_bio)
 +static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode,
 +                                              struct btrfs_io_bio *io_bio)
  {
        struct btrfs_fs_info *fs_info;
        struct bio_vec bvec;
        unsigned int pgoff;
        u32 sectorsize;
        int nr_sectors;
 -      int ret;
 -      int err = 0;
 +      blk_status_t ret;
 +      blk_status_t err = BLK_STS_OK;
  
        fs_info = BTRFS_I(inode)->root->fs_info;
        sectorsize = fs_info->sectorsize;
@@@ -8106,7 -8194,7 +8196,7 @@@ next_block_or_try_again
                        goto next;
                }
  
-               wait_for_completion(&done.done);
+               wait_for_completion_io(&done.done);
  
                if (!done.uptodate) {
                        /* We might have another mirror, so try again */
@@@ -8185,12 -8273,11 +8275,12 @@@ static blk_status_t __btrfs_subio_endio
        int csum_pos;
        bool uptodate = (err == 0);
        int ret;
 +      blk_status_t status;
  
        fs_info = BTRFS_I(inode)->root->fs_info;
        sectorsize = fs_info->sectorsize;
  
 -      err = 0;
 +      err = BLK_STS_OK;
        start = io_bio->logical;
        done.inode = inode;
        io_bio->bio.bi_iter = io_bio->iter;
@@@ -8212,16 -8299,16 +8302,16 @@@ try_again
                done.start = start;
                init_completion(&done.done);
  
 -              ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
 -                              pgoff, start, start + sectorsize - 1,
 -                              io_bio->mirror_num,
 -                              btrfs_retry_endio, &done);
 -              if (ret) {
 -                      err = errno_to_blk_status(ret);
 +              status = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
 +                                      pgoff, start, start + sectorsize - 1,
 +                                      io_bio->mirror_num, btrfs_retry_endio,
 +                                      &done);
 +              if (status) {
 +                      err = status;
                        goto next;
                }
  
-               wait_for_completion(&done.done);
+               wait_for_completion_io(&done.done);
  
                if (!done.uptodate) {
                        /* We might have another mirror, so try again */
@@@ -8253,7 -8340,7 +8343,7 @@@ static blk_status_t btrfs_subio_endio_r
                if (unlikely(err))
                        return __btrfs_correct_data_nocsum(inode, io_bio);
                else
 -                      return 0;
 +                      return BLK_STS_OK;
        } else {
                return __btrfs_subio_endio_read(inode, io_bio, err);
        }
@@@ -8426,9 -8513,8 +8516,9 @@@ static inline blk_status_t btrfs_lookup
        return 0;
  }
  
 -static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 -                                       u64 file_offset, int async_submit)
 +static inline blk_status_t
 +__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
-                      int skip_sum, int async_submit)
++                     int async_submit)
  {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_dio_private *dip = bio->bi_private;
                        goto err;
        }
  
-       if (skip_sum)
+       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
                goto map;
  
        if (write && async_submit) {
@@@ -8476,8 -8562,7 +8566,7 @@@ err
        return ret;
  }
  
- static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
-                                   int skip_sum)
+ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
  {
        struct inode *inode = dip->inode;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        int clone_offset = 0;
        int clone_len;
        int ret;
 +      blk_status_t status;
  
        map_length = orig_bio->bi_iter.bi_size;
        submit_len = map_length;
                 */
                atomic_inc(&dip->pending_bios);
  
-               status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
 -              ret = __btrfs_submit_dio_bio(bio, inode, file_offset,
 -                                           async_submit);
 -              if (ret) {
++              status = __btrfs_submit_dio_bio(bio, inode, file_offset,
 +                                              async_submit);
 +              if (status) {
                        bio_put(bio);
                        atomic_dec(&dip->pending_bios);
                        goto out_err;
        } while (submit_len > 0);
  
  submit:
-       status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
-                                       async_submit);
 -      ret = __btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
 -      if (!ret)
++      status = __btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
 +      if (!status)
                return 0;
  
        bio_put(bio);
@@@ -8587,12 -8670,9 +8675,9 @@@ static void btrfs_submit_direct(struct 
        struct btrfs_dio_private *dip = NULL;
        struct bio *bio = NULL;
        struct btrfs_io_bio *io_bio;
-       int skip_sum;
        bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
        int ret = 0;
  
-       skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
        bio = btrfs_bio_clone(dio_bio);
  
        dip = kzalloc(sizeof(*dip), GFP_NOFS);
                        dio_data->unsubmitted_oe_range_end;
        }
  
-       ret = btrfs_submit_direct_hook(dip, skip_sum);
+       ret = btrfs_submit_direct_hook(dip);
        if (!ret)
                return;
  
@@@ -8735,7 -8815,6 +8820,6 @@@ static ssize_t btrfs_direct_IO(struct k
                return 0;
  
        inode_dio_begin(inode);
-       smp_mb__after_atomic();
  
        /*
         * The generic stuff only does filemap_write_and_wait_range, which
@@@ -9408,7 -9487,8 +9492,8 @@@ struct inode *btrfs_alloc_inode(struct 
        ei->reserved_extents = 0;
  
        ei->runtime_flags = 0;
-       ei->force_compress = BTRFS_COMPRESS_NONE;
+       ei->prop_compress = BTRFS_COMPRESS_NONE;
+       ei->defrag_compress = BTRFS_COMPRESS_NONE;
  
        ei->delayed_node = NULL;
  
@@@ -10748,6 -10828,7 +10833,7 @@@ static const struct file_operations btr
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .iterate_shared = btrfs_real_readdir,
+       .open           = btrfs_opendir,
        .unlocked_ioctl = btrfs_ioctl,
  #ifdef CONFIG_COMPAT
        .compat_ioctl   = btrfs_compat_ioctl,
diff --combined fs/btrfs/scrub.c
index b0b71e8e4c36d23b50133f4c10073bfcccefcc41,7c9c82bb62550877f4dc0d472c95ff9f98c89bd5..e3f6c49e5c4d8aad323b77f72ed6e12da26bee72
@@@ -182,8 -182,8 +182,8 @@@ struct scrub_ctx 
        struct scrub_bio        *wr_curr_bio;
        struct mutex            wr_lock;
        int                     pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
-       atomic_t                flush_all_writes;
        struct btrfs_device     *wr_tgtdev;
+       bool                    flush_all_writes;
  
        /*
         * statistics
@@@ -717,7 -717,7 +717,7 @@@ struct scrub_ctx *scrub_setup_ctx(struc
                WARN_ON(!fs_info->dev_replace.tgtdev);
                sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
                sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
-               atomic_set(&sctx->flush_all_writes, 0);
+               sctx->flush_all_writes = false;
        }
  
        return sctx;
@@@ -1704,7 -1704,7 +1704,7 @@@ static int scrub_submit_raid56_bio_wait
        if (ret)
                return ret;
  
-       wait_for_completion(&done.event);
+       wait_for_completion_io(&done.event);
        if (done.status)
                return -EIO;
  
@@@ -1738,7 -1738,7 +1738,7 @@@ static void scrub_recheck_block(struct 
  
                WARN_ON(!page->page);
                bio = btrfs_io_bio_alloc(1);
 -              bio->bi_bdev = page->dev->bdev;
 +              bio_set_dev(bio, page->dev->bdev);
  
                bio_add_page(bio, page->page, PAGE_SIZE, 0);
                if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
@@@ -1769,7 -1769,7 +1769,7 @@@ static inline int scrub_check_fsid(u8 f
        struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
        int ret;
  
-       ret = memcmp(fsid, fs_devices->fsid, BTRFS_UUID_SIZE);
+       ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
        return !ret;
  }
  
@@@ -1826,7 -1826,7 +1826,7 @@@ static int scrub_repair_page_from_good_
                }
  
                bio = btrfs_io_bio_alloc(1);
 -              bio->bi_bdev = page_bad->dev->bdev;
 +              bio_set_dev(bio, page_bad->dev->bdev);
                bio->bi_iter.bi_sector = page_bad->physical >> 9;
                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
  
@@@ -1921,7 -1921,7 +1921,7 @@@ again
  
                bio->bi_private = sbio;
                bio->bi_end_io = scrub_wr_bio_end_io;
 -              bio->bi_bdev = sbio->dev->bdev;
 +              bio_set_dev(bio, sbio->dev->bdev);
                bio->bi_iter.bi_sector = sbio->physical >> 9;
                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
                sbio->status = 0;
@@@ -1964,7 -1964,7 +1964,7 @@@ static void scrub_wr_submit(struct scru
  
        sbio = sctx->wr_curr_bio;
        sctx->wr_curr_bio = NULL;
 -      WARN_ON(!sbio->bio->bi_bdev);
 +      WARN_ON(!sbio->bio->bi_disk);
        scrub_pending_bio_inc(sctx);
        /* process all writes in a single worker thread. Then the block layer
         * orders the requests before sending them to the driver which
@@@ -2321,7 -2321,7 +2321,7 @@@ again
  
                bio->bi_private = sbio;
                bio->bi_end_io = scrub_bio_end_io;
 -              bio->bi_bdev = sbio->dev->bdev;
 +              bio_set_dev(bio, sbio->dev->bdev);
                bio->bi_iter.bi_sector = sbio->physical >> 9;
                bio_set_op_attrs(bio, REQ_OP_READ, 0);
                sbio->status = 0;
@@@ -2402,8 -2402,7 +2402,7 @@@ static void scrub_missing_raid56_worker
  
        scrub_block_put(sblock);
  
-       if (sctx->is_dev_replace &&
-           atomic_read(&sctx->flush_all_writes)) {
+       if (sctx->is_dev_replace && sctx->flush_all_writes) {
                mutex_lock(&sctx->wr_lock);
                scrub_wr_submit(sctx);
                mutex_unlock(&sctx->wr_lock);
@@@ -2607,8 -2606,7 +2606,7 @@@ static void scrub_bio_end_io_worker(str
        sctx->first_free = sbio->index;
        spin_unlock(&sctx->list_lock);
  
-       if (sctx->is_dev_replace &&
-           atomic_read(&sctx->flush_all_writes)) {
+       if (sctx->is_dev_replace && sctx->flush_all_writes) {
                mutex_lock(&sctx->wr_lock);
                scrub_wr_submit(sctx);
                mutex_unlock(&sctx->wr_lock);
@@@ -2622,7 -2620,8 +2620,8 @@@ static inline void __scrub_mark_bitmap(
                                       u64 start, u64 len)
  {
        u64 offset;
-       int nsectors;
+       u64 nsectors64;
+       u32 nsectors;
        int sectorsize = sparity->sctx->fs_info->sectorsize;
  
        if (len >= sparity->stripe_len) {
        start -= sparity->logic_start;
        start = div64_u64_rem(start, sparity->stripe_len, &offset);
        offset = div_u64(offset, sectorsize);
-       nsectors = (int)len / sectorsize;
+       nsectors64 = div_u64(len, sectorsize);
+       ASSERT(nsectors64 < UINT_MAX);
+       nsectors = (u32)nsectors64;
  
        if (offset + nsectors <= sparity->nsectors) {
                bitmap_set(bitmap, offset, nsectors);
@@@ -2706,7 -2708,9 +2708,9 @@@ static int scrub_find_csum(struct scrub
        if (!sum)
                return 0;
  
-       index = ((u32)(logical - sum->bytenr)) / sctx->fs_info->sectorsize;
+       index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
+       ASSERT(index < UINT_MAX);
        num_sectors = sum->len / sctx->fs_info->sectorsize;
        memcpy(csum, sum->sums + index, sctx->csum_size);
        if (index == num_sectors - 1) {
@@@ -3440,14 -3444,14 +3444,14 @@@ static noinline_for_stack int scrub_str
                 */
                if (atomic_read(&fs_info->scrub_pause_req)) {
                        /* push queued extents */
-                       atomic_set(&sctx->flush_all_writes, 1);
+                       sctx->flush_all_writes = true;
                        scrub_submit(sctx);
                        mutex_lock(&sctx->wr_lock);
                        scrub_wr_submit(sctx);
                        mutex_unlock(&sctx->wr_lock);
                        wait_event(sctx->list_wait,
                                   atomic_read(&sctx->bios_in_flight) == 0);
-                       atomic_set(&sctx->flush_all_writes, 0);
+                       sctx->flush_all_writes = false;
                        scrub_blocked_if_needed(fs_info);
                }
  
@@@ -3869,8 -3873,7 +3873,7 @@@ int scrub_enumerate_chunks(struct scrub
                        ro_set = 0;
                } else {
                        btrfs_warn(fs_info,
-                                  "failed setting block group ro, ret=%d\n",
-                                  ret);
+                                  "failed setting block group ro: %d", ret);
                        btrfs_put_block_group(cache);
                        break;
                }
                 * write requests are really completed when bios_in_flight
                 * changes to 0.
                 */
-               atomic_set(&sctx->flush_all_writes, 1);
+               sctx->flush_all_writes = true;
                scrub_submit(sctx);
                mutex_lock(&sctx->wr_lock);
                scrub_wr_submit(sctx);
                 */
                wait_event(sctx->list_wait,
                           atomic_read(&sctx->workers_pending) == 0);
-               atomic_set(&sctx->flush_all_writes, 0);
+               sctx->flush_all_writes = false;
  
                scrub_pause_off(fs_info);
  
@@@ -4012,14 -4015,8 +4015,8 @@@ static noinline_for_stack int scrub_wor
        int max_active = fs_info->thread_pool_size;
  
        if (fs_info->scrub_workers_refcnt == 0) {
-               if (is_dev_replace)
-                       fs_info->scrub_workers =
-                               btrfs_alloc_workqueue(fs_info, "scrub", flags,
-                                                     1, 4);
-               else
-                       fs_info->scrub_workers =
-                               btrfs_alloc_workqueue(fs_info, "scrub", flags,
-                                                     max_active, 4);
+               fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
+                               flags, is_dev_replace ? 1 : max_active, 4);
                if (!fs_info->scrub_workers)
                        goto fail_scrub_workers;
  
@@@ -4627,7 -4624,7 +4624,7 @@@ static int write_page_nocow(struct scru
        bio = btrfs_io_bio_alloc(1);
        bio->bi_iter.bi_size = 0;
        bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
 -      bio->bi_bdev = dev->bdev;
 +      bio_set_dev(bio, dev->bdev);
        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
        ret = bio_add_page(bio, page, PAGE_SIZE, 0);
        if (ret != PAGE_SIZE) {
diff --combined fs/btrfs/volumes.c
index 002aa318da67bb560979430ae6eee0f6ad263417,d024f1b072823a8a02be97daeb02ba01e53ef5a4..c188256a367c99ebdcff970ce13147e4365a2cbd
@@@ -152,7 -152,15 +152,15 @@@ struct list_head *btrfs_get_fs_uuids(vo
        return &fs_uuids;
  }
  
- static struct btrfs_fs_devices *__alloc_fs_devices(void)
+ /*
+  * alloc_fs_devices - allocate struct btrfs_fs_devices
+  * @fsid:     if not NULL, copy the uuid to fs_devices::fsid
+  *
+  * Return a pointer to a new struct btrfs_fs_devices on success, or ERR_PTR().
+  * The returned struct is not linked onto any lists and can be destroyed with
+  * kfree() right away.
+  */
+ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
  {
        struct btrfs_fs_devices *fs_devs;
  
        INIT_LIST_HEAD(&fs_devs->resized_devices);
        INIT_LIST_HEAD(&fs_devs->alloc_list);
        INIT_LIST_HEAD(&fs_devs->list);
-       return fs_devs;
- }
- /**
-  * alloc_fs_devices - allocate struct btrfs_fs_devices
-  * @fsid:     a pointer to UUID for this FS.  If NULL a new UUID is
-  *            generated.
-  *
-  * Return: a pointer to a new &struct btrfs_fs_devices on success;
-  * ERR_PTR() on error.  Returned struct is not linked onto any lists and
-  * can be destroyed with kfree() right away.
-  */
- static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
- {
-       struct btrfs_fs_devices *fs_devs;
-       fs_devs = __alloc_fs_devices();
-       if (IS_ERR(fs_devs))
-               return fs_devs;
        if (fsid)
                memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
-       else
-               generate_random_uuid(fs_devs->fsid);
  
        return fs_devs;
  }
@@@ -269,9 -254,17 +254,17 @@@ static struct btrfs_device *__alloc_dev
        return dev;
  }
  
- static noinline struct btrfs_device *__find_device(struct list_head *head,
-                                                  u64 devid, u8 *uuid)
+ /*
+  * Find a device specified by @devid or @uuid in the list of @fs_devices, or
+  * return NULL.
+  *
+  * If devid and uuid are both specified, the match must be exact, otherwise
+  * only devid is used.
+  */
+ static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
+               u64 devid, const u8 *uuid)
  {
+       struct list_head *head = &fs_devices->devices;
        struct btrfs_device *dev;
  
        list_for_each_entry(dev, head, dev_list) {
@@@ -310,7 -303,7 +303,7 @@@ btrfs_get_bdev_and_sb(const char *devic
  
        if (flush)
                filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
-       ret = set_blocksize(*bdev, 4096);
+       ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
        if (ret) {
                blkdev_put(*bdev, flags);
                goto error;
@@@ -636,8 -629,8 +629,8 @@@ static noinline int device_list_add(con
  
                device = NULL;
        } else {
-               device = __find_device(&fs_devices->devices, devid,
-                                      disk_super->dev_item.uuid);
+               device = find_device(fs_devices, devid,
+                               disk_super->dev_item.uuid);
        }
  
        if (!device) {
@@@ -1578,7 -1571,6 +1571,6 @@@ out
  
  static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
                                  struct btrfs_device *device,
-                                 u64 chunk_tree, u64 chunk_objectid,
                                  u64 chunk_offset, u64 start, u64 num_bytes)
  {
        int ret;
        leaf = path->nodes[0];
        extent = btrfs_item_ptr(leaf, path->slots[0],
                                struct btrfs_dev_extent);
-       btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
-       btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
+       btrfs_set_dev_extent_chunk_tree(leaf, extent,
+                                       BTRFS_CHUNK_TREE_OBJECTID);
+       btrfs_set_dev_extent_chunk_objectid(leaf, extent,
+                                           BTRFS_FIRST_CHUNK_TREE_OBJECTID);
        btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
  
-       write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
        btrfs_set_dev_extent_length(leaf, extent, num_bytes);
        btrfs_mark_buffer_dirty(leaf);
  out:
@@@ -1726,7 -1718,7 +1718,7 @@@ static int btrfs_add_device(struct btrf
        ptr = btrfs_device_uuid(dev_item);
        write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
        ptr = btrfs_device_fsid(dev_item);
-       write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_UUID_SIZE);
+       write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE);
        btrfs_mark_buffer_dirty(leaf);
  
        ret = 0;
@@@ -1872,7 -1864,6 +1864,6 @@@ int btrfs_rm_device(struct btrfs_fs_inf
        struct btrfs_fs_devices *cur_devices;
        u64 num_devices;
        int ret = 0;
-       bool clear_super = false;
  
        mutex_lock(&uuid_mutex);
  
                list_del_init(&device->dev_alloc_list);
                device->fs_devices->rw_devices--;
                mutex_unlock(&fs_info->chunk_mutex);
-               clear_super = true;
        }
  
        mutex_unlock(&uuid_mutex);
                free_fs_devices(cur_devices);
        }
  
-       fs_info->num_tolerated_disk_barrier_failures =
-               btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
  out:
        mutex_unlock(&uuid_mutex);
        return ret;
@@@ -2202,7 -2189,7 +2189,7 @@@ static int btrfs_prepare_sprout(struct 
        if (!fs_devices->seeding)
                return -EINVAL;
  
-       seed_devices = __alloc_fs_devices();
+       seed_devices = alloc_fs_devices(NULL);
        if (IS_ERR(seed_devices))
                return PTR_ERR(seed_devices);
  
@@@ -2261,7 -2248,7 +2248,7 @@@ static int btrfs_finish_sprout(struct b
        struct btrfs_dev_item *dev_item;
        struct btrfs_device *device;
        struct btrfs_key key;
-       u8 fs_uuid[BTRFS_UUID_SIZE];
+       u8 fs_uuid[BTRFS_FSID_SIZE];
        u8 dev_uuid[BTRFS_UUID_SIZE];
        u64 devid;
        int ret;
@@@ -2304,7 -2291,7 +2291,7 @@@ next_slot
                read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                                   BTRFS_UUID_SIZE);
                read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
-                                  BTRFS_UUID_SIZE);
+                                  BTRFS_FSID_SIZE);
                device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
                BUG_ON(!device); /* Logic error */
  
@@@ -2407,7 -2394,7 +2394,7 @@@ int btrfs_init_new_device(struct btrfs_
        device->is_tgtdev_for_dev_replace = 0;
        device->mode = FMODE_EXCL;
        device->dev_stats_valid = 1;
-       set_blocksize(device->bdev, 4096);
+       set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
  
        if (seeding_dev) {
                sb->s_flags &= ~MS_RDONLY;
                                   "sysfs: failed to create fsid for sprout");
        }
  
-       fs_info->num_tolerated_disk_barrier_failures =
-               btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
        ret = btrfs_commit_transaction(trans);
  
        if (seeding_dev) {
@@@ -2612,7 -2597,7 +2597,7 @@@ int btrfs_init_dev_replace_tgtdev(struc
        device->is_tgtdev_for_dev_replace = 1;
        device->mode = FMODE_EXCL;
        device->dev_stats_valid = 1;
-       set_blocksize(device->bdev, 4096);
+       set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
        device->fs_devices = fs_info->fs_devices;
        list_add(&device->dev_list, &fs_info->fs_devices->devices);
        fs_info->fs_devices->num_devices++;
@@@ -2728,8 -2713,7 +2713,7 @@@ int btrfs_grow_device(struct btrfs_tran
  }
  
  static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
-                           struct btrfs_fs_info *fs_info, u64 chunk_objectid,
-                           u64 chunk_offset)
+                           struct btrfs_fs_info *fs_info, u64 chunk_offset)
  {
        struct btrfs_root *root = fs_info->chunk_root;
        int ret;
        if (!path)
                return -ENOMEM;
  
-       key.objectid = chunk_objectid;
+       key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
        key.offset = chunk_offset;
        key.type = BTRFS_CHUNK_ITEM_KEY;
  
@@@ -2763,8 -2747,7 +2747,7 @@@ out
        return ret;
  }
  
- static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info,
-                              u64 chunk_objectid, u64 chunk_offset)
+ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
  {
        struct btrfs_super_block *super_copy = fs_info->super_copy;
        struct btrfs_disk_key *disk_key;
                        ret = -EIO;
                        break;
                }
-               if (key.objectid == chunk_objectid &&
+               if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
                    key.offset == chunk_offset) {
                        memmove(ptr, ptr + len, array_size - (cur + len));
                        array_size -= len;
@@@ -2846,7 -2829,6 +2829,6 @@@ int btrfs_remove_chunk(struct btrfs_tra
        struct extent_map *em;
        struct map_lookup *map;
        u64 dev_extent_len = 0;
-       u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
        int i, ret = 0;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
  
        }
        mutex_unlock(&fs_devices->device_list_mutex);
  
-       ret = btrfs_free_chunk(trans, fs_info, chunk_objectid, chunk_offset);
+       ret = btrfs_free_chunk(trans, fs_info, chunk_offset);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                goto out;
        trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
  
        if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
-               ret = btrfs_del_sys_chunk(fs_info, chunk_objectid,
-                                         chunk_offset);
+               ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
                if (ret) {
                        btrfs_abort_transaction(trans, ret);
                        goto out;
@@@ -3312,7 -3293,6 +3293,6 @@@ static int chunk_devid_filter(struct ex
  /* [pstart, pend) */
  static int chunk_drange_filter(struct extent_buffer *leaf,
                               struct btrfs_chunk *chunk,
-                              u64 chunk_offset,
                               struct btrfs_balance_args *bargs)
  {
        struct btrfs_stripe *stripe;
@@@ -3439,7 -3419,7 +3419,7 @@@ static int should_balance_chunk(struct 
  
        /* drange filter, makes sense only with devid filter */
        if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
-           chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) {
+           chunk_drange_filter(leaf, chunk, bargs)) {
                return 0;
        }
  
@@@ -3898,13 -3878,6 +3878,6 @@@ int btrfs_balance(struct btrfs_balance_
                           meta_target, data_target);
        }
  
-       if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
-               fs_info->num_tolerated_disk_barrier_failures = min(
-                       btrfs_calc_num_tolerated_disk_barrier_failures(fs_info),
-                       btrfs_get_num_tolerated_disk_barrier_failures(
-                               bctl->sys.target));
-       }
        ret = insert_balance_item(fs_info, bctl);
        if (ret && ret != -EEXIST)
                goto out;
        mutex_lock(&fs_info->balance_mutex);
        atomic_dec(&fs_info->balance_running);
  
-       if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
-               fs_info->num_tolerated_disk_barrier_failures =
-                       btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
-       }
        if (bargs) {
                memset(bargs, 0, sizeof(*bargs));
                update_ioctl_balance_args(fs_info, 0, bargs);
@@@ -4127,7 -4095,6 +4095,6 @@@ static int btrfs_uuid_scan_kthread(voi
        struct btrfs_fs_info *fs_info = data;
        struct btrfs_root *root = fs_info->tree_root;
        struct btrfs_key key;
-       struct btrfs_key max_key;
        struct btrfs_path *path = NULL;
        int ret = 0;
        struct extent_buffer *eb;
        key.type = BTRFS_ROOT_ITEM_KEY;
        key.offset = 0;
  
-       max_key.objectid = (u64)-1;
-       max_key.type = BTRFS_ROOT_ITEM_KEY;
-       max_key.offset = (u64)-1;
        while (1) {
                ret = btrfs_search_forward(root, &key, path, 0);
                if (ret) {
@@@ -4601,12 -4564,6 +4564,6 @@@ static int btrfs_cmp_device_info(const 
        return 0;
  }
  
- static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
- {
-       /* TODO allow them to set a preferred stripe size */
-       return SZ_64K;
- }
  static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
  {
        if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
@@@ -4629,7 -4586,7 +4586,7 @@@ static int __btrfs_alloc_chunk(struct b
  {
        struct btrfs_fs_info *info = trans->fs_info;
        struct btrfs_fs_devices *fs_devices = info->fs_devices;
-       struct list_head *cur;
+       struct btrfs_device *device;
        struct map_lookup *map = NULL;
        struct extent_map_tree *em_tree;
        struct extent_map *em;
        u64 max_chunk_size;
        u64 stripe_size;
        u64 num_bytes;
-       u64 raid_stripe_len = BTRFS_STRIPE_LEN;
        int ndevs;
        int i;
        int j;
        if (!devices_info)
                return -ENOMEM;
  
-       cur = fs_devices->alloc_list.next;
        /*
         * in the first pass through the devices list, we gather information
         * about the available holes on each device.
         */
        ndevs = 0;
-       while (cur != &fs_devices->alloc_list) {
-               struct btrfs_device *device;
+       list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
                u64 max_avail;
                u64 dev_offset;
  
-               device = list_entry(cur, struct btrfs_device, dev_alloc_list);
-               cur = cur->next;
                if (!device->writeable) {
                        WARN(1, KERN_ERR
                               "BTRFS: read-only device in alloc_list\n");
             btrfs_cmp_device_info, NULL);
  
        /* round down to number of usable stripes */
-       ndevs -= ndevs % devs_increment;
+       ndevs = round_down(ndevs, devs_increment);
  
        if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
                ret = -ENOSPC;
                goto error;
        }
  
-       if (devs_max && ndevs > devs_max)
-               ndevs = devs_max;
+       ndevs = min(ndevs, devs_max);
        /*
         * the primary goal is to maximize the number of stripes, so use as many
         * devices as possible, even if the stripes are not maximum sized.
         */
        data_stripes = num_stripes / ncopies;
  
-       if (type & BTRFS_BLOCK_GROUP_RAID5) {
-               raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
-                                                        info->stripesize);
+       if (type & BTRFS_BLOCK_GROUP_RAID5)
                data_stripes = num_stripes - 1;
-       }
-       if (type & BTRFS_BLOCK_GROUP_RAID6) {
-               raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
-                                                        info->stripesize);
+       if (type & BTRFS_BLOCK_GROUP_RAID6)
                data_stripes = num_stripes - 2;
-       }
  
        /*
         * Use the number of data stripes to figure out how big this chunk
        stripe_size = div_u64(stripe_size, dev_stripes);
  
        /* align to BTRFS_STRIPE_LEN */
-       stripe_size = div64_u64(stripe_size, raid_stripe_len);
-       stripe_size *= raid_stripe_len;
+       stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
  
        map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
        if (!map) {
                                                   j * stripe_size;
                }
        }
-       map->sector_size = info->sectorsize;
-       map->stripe_len = raid_stripe_len;
-       map->io_align = raid_stripe_len;
-       map->io_width = raid_stripe_len;
+       map->stripe_len = BTRFS_STRIPE_LEN;
+       map->io_align = BTRFS_STRIPE_LEN;
+       map->io_width = BTRFS_STRIPE_LEN;
        map->type = type;
        map->sub_stripes = sub_stripes;
  
                goto error;
        }
  
-       ret = btrfs_make_block_group(trans, info, 0, type,
-                                    BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-                                    start, num_bytes);
+       ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
        if (ret)
                goto error_del_extent;
  
@@@ -4963,11 -4903,8 +4903,8 @@@ int btrfs_finish_chunk_alloc(struct btr
                ret = btrfs_update_device(trans, device);
                if (ret)
                        break;
-               ret = btrfs_alloc_dev_extent(trans, device,
-                                            chunk_root->root_key.objectid,
-                                            BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-                                            chunk_offset, dev_offset,
-                                            stripe_size);
+               ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
+                                            dev_offset, stripe_size);
                if (ret)
                        break;
        }
@@@ -5172,7 -5109,6 +5109,6 @@@ int btrfs_num_copies(struct btrfs_fs_in
  }
  
  unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
-                                   struct btrfs_mapping_tree *map_tree,
                                    u64 logical)
  {
        struct extent_map *em;
        unsigned long len = fs_info->sectorsize;
  
        em = get_chunk_map(fs_info, logical, len);
-       WARN_ON(IS_ERR(em));
  
-       map = em->map_lookup;
-       if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
-               len = map->stripe_len * nr_data_stripes(map);
-       free_extent_map(em);
+       if (!WARN_ON(IS_ERR(em))) {
+               map = em->map_lookup;
+               if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+                       len = map->stripe_len * nr_data_stripes(map);
+               free_extent_map(em);
+       }
        return len;
  }
  
- int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
-                          u64 logical, u64 len, int mirror_num)
+ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
  {
        struct extent_map *em;
        struct map_lookup *map;
        int ret = 0;
  
        em = get_chunk_map(fs_info, logical, len);
-       WARN_ON(IS_ERR(em));
  
-       map = em->map_lookup;
-       if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
-               ret = 1;
-       free_extent_map(em);
+       if(!WARN_ON(IS_ERR(em))) {
+               map = em->map_lookup;
+               if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+                       ret = 1;
+               free_extent_map(em);
+       }
        return ret;
  }
  
@@@ -6188,7 -6125,7 +6125,7 @@@ static void submit_stripe_bio(struct bt
                rcu_read_unlock();
        }
  #endif
 -      bio->bi_bdev = dev->bdev;
 +      bio_set_dev(bio, dev->bdev);
  
        btrfs_bio_counter_inc_noblocked(fs_info);
  
@@@ -6212,8 -6149,8 +6149,8 @@@ static void bbio_error(struct btrfs_bi
        }
  }
  
 -int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 -                int mirror_num, int async_submit)
 +blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 +                         int mirror_num, int async_submit)
  {
        struct btrfs_device *dev;
        struct bio *first_bio = bio;
                                &map_length, &bbio, mirror_num, 1);
        if (ret) {
                btrfs_bio_counter_dec(fs_info);
 -              return ret;
 +              return errno_to_blk_status(ret);
        }
  
        total_devs = bbio->num_stripes;
                }
  
                btrfs_bio_counter_dec(fs_info);
 -              return ret;
 +              return errno_to_blk_status(ret);
        }
  
        if (map_length < length) {
                                  dev_nr, async_submit);
        }
        btrfs_bio_counter_dec(fs_info);
 -      return 0;
 +      return BLK_STS_OK;
  }
  
  struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
        cur_devices = fs_info->fs_devices;
        while (cur_devices) {
                if (!fsid ||
-                   !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
-                       device = __find_device(&cur_devices->devices,
-                                              devid, uuid);
+                   !memcmp(cur_devices->fsid, fsid, BTRFS_FSID_SIZE)) {
+                       device = find_device(cur_devices, devid, uuid);
                        if (device)
                                return device;
                }
@@@ -6450,7 -6386,6 +6386,6 @@@ static int read_one_chunk(struct btrfs_
        struct extent_map *em;
        u64 logical;
        u64 length;
-       u64 stripe_len;
        u64 devid;
        u8 uuid[BTRFS_UUID_SIZE];
        int num_stripes;
  
        logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
-       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
        num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
  
        ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical);
        map->num_stripes = num_stripes;
        map->io_width = btrfs_chunk_io_width(leaf, chunk);
        map->io_align = btrfs_chunk_io_align(leaf, chunk);
-       map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
        map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
        map->type = btrfs_chunk_type(leaf, chunk);
        map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
                if (!map->stripes[i].dev &&
                    !btrfs_test_opt(fs_info, DEGRADED)) {
                        free_extent_map(em);
+                       btrfs_report_missing_device(fs_info, devid, uuid);
                        return -EIO;
                }
                if (!map->stripes[i].dev) {
                                free_extent_map(em);
                                return -EIO;
                        }
-                       btrfs_warn(fs_info, "devid %llu uuid %pU is missing",
-                                  devid, uuid);
+                       btrfs_report_missing_device(fs_info, devid, uuid);
                }
                map->stripes[i].dev->in_fs_metadata = 1;
        }
@@@ -6569,10 -6502,11 +6502,11 @@@ static struct btrfs_fs_devices *open_se
        int ret;
  
        BUG_ON(!mutex_is_locked(&uuid_mutex));
+       ASSERT(fsid);
  
        fs_devices = fs_info->fs_devices->seed;
        while (fs_devices) {
-               if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE))
+               if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
                        return fs_devices;
  
                fs_devices = fs_devices->seed;
@@@ -6625,16 -6559,16 +6559,16 @@@ static int read_one_dev(struct btrfs_fs
        struct btrfs_device *device;
        u64 devid;
        int ret;
-       u8 fs_uuid[BTRFS_UUID_SIZE];
+       u8 fs_uuid[BTRFS_FSID_SIZE];
        u8 dev_uuid[BTRFS_UUID_SIZE];
  
        devid = btrfs_device_id(leaf, dev_item);
        read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                           BTRFS_UUID_SIZE);
        read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
-                          BTRFS_UUID_SIZE);
+                          BTRFS_FSID_SIZE);
  
-       if (memcmp(fs_uuid, fs_info->fsid, BTRFS_UUID_SIZE)) {
+       if (memcmp(fs_uuid, fs_info->fsid, BTRFS_FSID_SIZE)) {
                fs_devices = open_seed_devices(fs_info, fs_uuid);
                if (IS_ERR(fs_devices))
                        return PTR_ERR(fs_devices);
  
        device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
        if (!device) {
-               if (!btrfs_test_opt(fs_info, DEGRADED))
+               if (!btrfs_test_opt(fs_info, DEGRADED)) {
+                       btrfs_report_missing_device(fs_info, devid, dev_uuid);
                        return -EIO;
+               }
  
                device = add_missing_dev(fs_devices, devid, dev_uuid);
                if (!device)
                        return -ENOMEM;
-               btrfs_warn(fs_info, "devid %llu uuid %pU missing",
-                               devid, dev_uuid);
+               btrfs_report_missing_device(fs_info, devid, dev_uuid);
        } else {
-               if (!device->bdev && !btrfs_test_opt(fs_info, DEGRADED))
-                       return -EIO;
+               if (!device->bdev) {
+                       btrfs_report_missing_device(fs_info, devid, dev_uuid);
+                       if (!btrfs_test_opt(fs_info, DEGRADED))
+                               return -EIO;
+               }
  
                if(!device->bdev && !device->missing) {
                        /*
@@@ -6818,6 -6756,70 +6756,70 @@@ out_short_read
        return -EIO;
  }
  
+ void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid,
+                                u8 *uuid)
+ {
+       btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing", devid, uuid);
+ }
+ /*
+  * Check if all chunks in the fs are OK for read-write degraded mount
+  *
+  * Return true if all chunks meet the minimal RW mount requirements.
+  * Return false if any chunk doesn't meet the minimal RW mount requirements.
+  */
+ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+       struct extent_map *em;
+       u64 next_start = 0;
+       bool ret = true;
+       read_lock(&map_tree->map_tree.lock);
+       em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1);
+       read_unlock(&map_tree->map_tree.lock);
+       /* No chunk at all? Return false anyway */
+       if (!em) {
+               ret = false;
+               goto out;
+       }
+       while (em) {
+               struct map_lookup *map;
+               int missing = 0;
+               int max_tolerated;
+               int i;
+               map = em->map_lookup;
+               max_tolerated =
+                       btrfs_get_num_tolerated_disk_barrier_failures(
+                                       map->type);
+               for (i = 0; i < map->num_stripes; i++) {
+                       struct btrfs_device *dev = map->stripes[i].dev;
+                       if (!dev || !dev->bdev || dev->missing ||
+                           dev->last_flush_error)
+                               missing++;
+               }
+               if (missing > max_tolerated) {
+                       btrfs_warn(fs_info,
+       "chunk %llu missing %d devices, max tolerance is %d for writeable mount",
+                                  em->start, missing, max_tolerated);
+                       free_extent_map(em);
+                       ret = false;
+                       goto out;
+               }
+               next_start = extent_map_end(em);
+               free_extent_map(em);
+               read_lock(&map_tree->map_tree.lock);
+               em = lookup_extent_mapping(&map_tree->map_tree, next_start,
+                                          (u64)(-1) - next_start);
+               read_unlock(&map_tree->map_tree.lock);
+       }
+ out:
+       return ret;
+ }
  int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
  {
        struct btrfs_root *root = fs_info->chunk_root;
diff --combined fs/btrfs/volumes.h
index 93277fc60930561a7ffb8c40d2e4ae61ce5908a0,eebe2f871dc71d0cb41df044c1896041aa0372bb..6108fdfec67fb4340042030435ac959a77b03997
@@@ -74,7 -74,7 +74,7 @@@ struct btrfs_device 
        int missing;
        int can_discard;
        int is_tgtdev_for_dev_replace;
 -      int last_flush_error;
 +      blk_status_t last_flush_error;
        int flush_bio_sent;
  
  #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
@@@ -353,7 -353,6 +353,6 @@@ struct map_lookup 
        int io_align;
        int io_width;
        u64 stripe_len;
-       int sector_size;
        int num_stripes;
        int sub_stripes;
        struct btrfs_bio_stripe stripes[];
@@@ -416,8 -415,8 +415,8 @@@ int btrfs_alloc_chunk(struct btrfs_tran
                      struct btrfs_fs_info *fs_info, u64 type);
  void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
  void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
 -int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 -                int mirror_num, int async_submit);
 +blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 +                         int mirror_num, int async_submit);
  int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                       fmode_t flags, void *holder);
  int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
@@@ -481,9 -480,8 +480,8 @@@ void btrfs_init_dev_replace_tgtdev_for_
                                              struct btrfs_device *tgtdev);
  void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
  int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
-                          u64 logical, u64 len, int mirror_num);
+                          u64 logical, u64 len);
  unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
-                                   struct btrfs_mapping_tree *map_tree,
                                    u64 logical);
  int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                                struct btrfs_fs_info *fs_info,
@@@ -543,4 -541,8 +541,8 @@@ struct list_head *btrfs_get_fs_uuids(vo
  void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
  void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
  
+ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info);
+ void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid,
+                                u8 *uuid);
  #endif