btrfs: scrub: remove the old scrub recheck code

[sfrench/cifs-2.6.git] / fs / btrfs / scrub.c
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index caf025f572cc5f382034d5ab4d3983ee54125be7..e311bb8e647b7cbf077e9d631a144b452c77906f 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -183,12 +183,6 @@ struct scrub_stripe {
         struct work_struct work;
  };
  
-struct scrub_recover {
-       refcount_t              refs;
-       struct btrfs_io_context *bioc;
-       u64                     map_length;
-};
-
  struct scrub_sector {
         struct scrub_block      *sblock;
         struct list_head        list;
@@ -200,8 +194,6 @@ struct scrub_sector {
         unsigned int            have_csum:1;
         unsigned int            io_error:1;
         u8                      csum[BTRFS_CSUM_SIZE];
-
-       struct scrub_recover    *recover;
  };
  
  struct scrub_bio {
@@ -303,13 +295,6 @@ struct scrub_warning {
         struct btrfs_device     *dev;
  };
  
-struct full_stripe_lock {
-       struct rb_node node;
-       u64 logical;
-       u64 refs;
-       struct mutex mutex;
-};
-
  #ifndef CONFIG_64BIT
  /* This structure is for architectures whose (void *) is smaller than u64 */
  struct scrub_page_private {
@@ -406,11 +391,11 @@ static void wait_scrub_stripe_io(struct scrub_stripe *stripe)
         wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0);
  }
  
-static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx,
-                                            struct btrfs_device *dev,
-                                            u64 logical, u64 physical,
-                                            u64 physical_for_dev_replace,
-                                            int mirror_num)
+struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx,
+                                     struct btrfs_device *dev,
+                                     u64 logical, u64 physical,
+                                     u64 physical_for_dev_replace,
+                                     int mirror_num)
  {
         struct scrub_block *sblock;
  
@@ -437,8 +422,7 @@ static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx,
   *
   * Will also allocate new pages for @sblock if needed.
   */
-static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock,
-                                              u64 logical)
+struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, u64 logical)
  {
         const pgoff_t page_index = (logical - sblock->logical) >> PAGE_SHIFT;
         struct scrub_sector *ssector;
@@ -534,17 +518,6 @@ static int bio_add_scrub_sector(struct bio *bio, struct scrub_sector *ssector,
                             scrub_sector_get_page_offset(ssector));
  }
  
-static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
-                                    struct scrub_block *sblocks_for_recheck[]);
-static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
-                               struct scrub_block *sblock,
-                               int retry_failed_mirror);
-static void scrub_recheck_block_checksum(struct scrub_block *sblock);
-static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
-                                            struct scrub_block *sblock_good);
-static int scrub_repair_sector_from_good_copy(struct scrub_block *sblock_bad,
-                                           struct scrub_block *sblock_good,
-                                           int sector_num, int force_write);
  static int scrub_checksum_data(struct scrub_block *sblock);
  static int scrub_checksum_tree_block(struct scrub_block *sblock);
  static int scrub_checksum_super(struct scrub_block *sblock);
@@ -555,12 +528,6 @@ static void scrub_bio_end_io_worker(struct work_struct *work);
  static void scrub_block_complete(struct scrub_block *sblock);
  static void scrub_put_ctx(struct scrub_ctx *sctx);
  
-static inline int scrub_is_page_on_raid56(struct scrub_sector *sector)
-{
-       return sector->recover &&
-              (sector->recover->bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
-}
-
  static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
  {
         refcount_inc(&sctx->refs);
@@ -606,223 +573,6 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
         scrub_pause_off(fs_info);
  }
  
-/*
- * Insert new full stripe lock into full stripe locks tree
- *
- * Return pointer to existing or newly inserted full_stripe_lock structure if
- * everything works well.
- * Return ERR_PTR(-ENOMEM) if we failed to allocate memory
- *
- * NOTE: caller must hold full_stripe_locks_root->lock before calling this
- * function
- */
-static struct full_stripe_lock *insert_full_stripe_lock(
-               struct btrfs_full_stripe_locks_tree *locks_root,
-               u64 fstripe_logical)
-{
-       struct rb_node **p;
-       struct rb_node *parent = NULL;
-       struct full_stripe_lock *entry;
-       struct full_stripe_lock *ret;
-
-       lockdep_assert_held(&locks_root->lock);
-
-       p = &locks_root->root.rb_node;
-       while (*p) {
-               parent = *p;
-               entry = rb_entry(parent, struct full_stripe_lock, node);
-               if (fstripe_logical < entry->logical) {
-                       p = &(*p)->rb_left;
-               } else if (fstripe_logical > entry->logical) {
-                       p = &(*p)->rb_right;
-               } else {
-                       entry->refs++;
-                       return entry;
-               }
-       }
-
-       /*
-        * Insert new lock.
-        */
-       ret = kmalloc(sizeof(*ret), GFP_KERNEL);
-       if (!ret)
-               return ERR_PTR(-ENOMEM);
-       ret->logical = fstripe_logical;
-       ret->refs = 1;
-       mutex_init(&ret->mutex);
-
-       rb_link_node(&ret->node, parent, p);
-       rb_insert_color(&ret->node, &locks_root->root);
-       return ret;
-}
-
-/*
- * Search for a full stripe lock of a block group
- *
- * Return pointer to existing full stripe lock if found
- * Return NULL if not found
- */
-static struct full_stripe_lock *search_full_stripe_lock(
-               struct btrfs_full_stripe_locks_tree *locks_root,
-               u64 fstripe_logical)
-{
-       struct rb_node *node;
-       struct full_stripe_lock *entry;
-
-       lockdep_assert_held(&locks_root->lock);
-
-       node = locks_root->root.rb_node;
-       while (node) {
-               entry = rb_entry(node, struct full_stripe_lock, node);
-               if (fstripe_logical < entry->logical)
-                       node = node->rb_left;
-               else if (fstripe_logical > entry->logical)
-                       node = node->rb_right;
-               else
-                       return entry;
-       }
-       return NULL;
-}
-
-/*
- * Helper to get full stripe logical from a normal bytenr.
- *
- * Caller must ensure @cache is a RAID56 block group.
- */
-static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr)
-{
-       u64 ret;
-
-       /*
-        * Due to chunk item size limit, full stripe length should not be
-        * larger than U32_MAX. Just a sanity check here.
-        */
-       WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
-
-       /*
-        * round_down() can only handle power of 2, while RAID56 full
-        * stripe length can be 64KiB * n, so we need to manually round down.
-        */
-       ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) *
-                       cache->full_stripe_len + cache->start;
-       return ret;
-}
-
-/*
- * Lock a full stripe to avoid concurrency of recovery and read
- *
- * It's only used for profiles with parities (RAID5/6), for other profiles it
- * does nothing.
- *
- * Return 0 if we locked full stripe covering @bytenr, with a mutex held.
- * So caller must call unlock_full_stripe() at the same context.
- *
- * Return <0 if encounters error.
- */
-static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
-                           bool *locked_ret)
-{
-       struct btrfs_block_group *bg_cache;
-       struct btrfs_full_stripe_locks_tree *locks_root;
-       struct full_stripe_lock *existing;
-       u64 fstripe_start;
-       int ret = 0;
-
-       *locked_ret = false;
-       bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
-       if (!bg_cache) {
-               ASSERT(0);
-               return -ENOENT;
-       }
-
-       /* Profiles not based on parity don't need full stripe lock */
-       if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
-               goto out;
-       locks_root = &bg_cache->full_stripe_locks_root;
-
-       fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
-
-       /* Now insert the full stripe lock */
-       mutex_lock(&locks_root->lock);
-       existing = insert_full_stripe_lock(locks_root, fstripe_start);
-       mutex_unlock(&locks_root->lock);
-       if (IS_ERR(existing)) {
-               ret = PTR_ERR(existing);
-               goto out;
-       }
-       mutex_lock(&existing->mutex);
-       *locked_ret = true;
-out:
-       btrfs_put_block_group(bg_cache);
-       return ret;
-}
-
-/*
- * Unlock a full stripe.
- *
- * NOTE: Caller must ensure it's the same context calling corresponding
- * lock_full_stripe().
- *
- * Return 0 if we unlock full stripe without problem.
- * Return <0 for error
- */
-static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
-                             bool locked)
-{
-       struct btrfs_block_group *bg_cache;
-       struct btrfs_full_stripe_locks_tree *locks_root;
-       struct full_stripe_lock *fstripe_lock;
-       u64 fstripe_start;
-       bool freeit = false;
-       int ret = 0;
-
-       /* If we didn't acquire full stripe lock, no need to continue */
-       if (!locked)
-               return 0;
-
-       bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
-       if (!bg_cache) {
-               ASSERT(0);
-               return -ENOENT;
-       }
-       if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
-               goto out;
-
-       locks_root = &bg_cache->full_stripe_locks_root;
-       fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
-
-       mutex_lock(&locks_root->lock);
-       fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
-       /* Unpaired unlock_full_stripe() detected */
-       if (!fstripe_lock) {
-               WARN_ON(1);
-               ret = -ENOENT;
-               mutex_unlock(&locks_root->lock);
-               goto out;
-       }
-
-       if (fstripe_lock->refs == 0) {
-               WARN_ON(1);
-               btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
-                       fstripe_lock->logical);
-       } else {
-               fstripe_lock->refs--;
-       }
-
-       if (fstripe_lock->refs == 0) {
-               rb_erase(&fstripe_lock->node, &locks_root->root);
-               freeit = true;
-       }
-       mutex_unlock(&locks_root->lock);
-
-       mutex_unlock(&fstripe_lock->mutex);
-       if (freeit)
-               kfree(fstripe_lock);
-out:
-       btrfs_put_block_group(bg_cache);
-       return ret;
-}
-
  static void scrub_free_csums(struct scrub_ctx *sctx)
  {
         while (!list_empty(&sctx->csum_list)) {
@@ -1101,444 +851,6 @@ out:
         btrfs_free_path(path);
  }
  
-static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
-{
-       scrub_print_common_warning(errstr, sblock->dev,
-                       sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER,
-                       sblock->logical, sblock->physical);
-}
-
-static inline void scrub_get_recover(struct scrub_recover *recover)
-{
-       refcount_inc(&recover->refs);
-}
-
-static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
-                                    struct scrub_recover *recover)
-{
-       if (refcount_dec_and_test(&recover->refs)) {
-               btrfs_bio_counter_dec(fs_info);
-               btrfs_put_bioc(recover->bioc);
-               kfree(recover);
-       }
-}
-
-/*
- * scrub_handle_errored_block gets called when either verification of the
- * sectors failed or the bio failed to read, e.g. with EIO. In the latter
- * case, this function handles all sectors in the bio, even though only one
- * may be bad.
- * The goal of this function is to repair the errored block by using the
- * contents of one of the mirrors.
- */
-static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
-{
-       struct scrub_ctx *sctx = sblock_to_check->sctx;
-       struct btrfs_device *dev = sblock_to_check->dev;
-       struct btrfs_fs_info *fs_info;
-       u64 logical;
-       unsigned int failed_mirror_index;
-       unsigned int is_metadata;
-       unsigned int have_csum;
-       /* One scrub_block for each mirror */
-       struct scrub_block *sblocks_for_recheck[BTRFS_MAX_MIRRORS] = { 0 };
-       struct scrub_block *sblock_bad;
-       int ret;
-       int mirror_index;
-       int sector_num;
-       int success;
-       bool full_stripe_locked;
-       unsigned int nofs_flag;
-       static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
-                                     DEFAULT_RATELIMIT_BURST);
-
-       BUG_ON(sblock_to_check->sector_count < 1);
-       fs_info = sctx->fs_info;
-       if (sblock_to_check->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
-               /*
-                * If we find an error in a super block, we just report it.
-                * They will get written with the next transaction commit
-                * anyway
-                */
-               scrub_print_warning("super block error", sblock_to_check);
-               spin_lock(&sctx->stat_lock);
-               ++sctx->stat.super_errors;
-               spin_unlock(&sctx->stat_lock);
-               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
-               return 0;
-       }
-       logical = sblock_to_check->logical;
-       ASSERT(sblock_to_check->mirror_num);
-       failed_mirror_index = sblock_to_check->mirror_num - 1;
-       is_metadata = !(sblock_to_check->sectors[0]->flags &
-                       BTRFS_EXTENT_FLAG_DATA);
-       have_csum = sblock_to_check->sectors[0]->have_csum;
-
-       if (!sctx->is_dev_replace && btrfs_repair_one_zone(fs_info, logical))
-               return 0;
-
-       /*
-        * We must use GFP_NOFS because the scrub task might be waiting for a
-        * worker task executing this function and in turn a transaction commit
-        * might be waiting the scrub task to pause (which needs to wait for all
-        * the worker tasks to complete before pausing).
-        * We do allocations in the workers through insert_full_stripe_lock()
-        * and scrub_add_sector_to_wr_bio(), which happens down the call chain of
-        * this function.
-        */
-       nofs_flag = memalloc_nofs_save();
-       /*
-        * For RAID5/6, race can happen for a different device scrub thread.
-        * For data corruption, Parity and Data threads will both try
-        * to recovery the data.
-        * Race can lead to doubly added csum error, or even unrecoverable
-        * error.
-        */
-       ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
-       if (ret < 0) {
-               memalloc_nofs_restore(nofs_flag);
-               spin_lock(&sctx->stat_lock);
-               if (ret == -ENOMEM)
-                       sctx->stat.malloc_errors++;
-               sctx->stat.read_errors++;
-               sctx->stat.uncorrectable_errors++;
-               spin_unlock(&sctx->stat_lock);
-               return ret;
-       }
-
-       /*
-        * read all mirrors one after the other. This includes to
-        * re-read the extent or metadata block that failed (that was
-        * the cause that this fixup code is called) another time,
-        * sector by sector this time in order to know which sectors
-        * caused I/O errors and which ones are good (for all mirrors).
-        * It is the goal to handle the situation when more than one
-        * mirror contains I/O errors, but the errors do not
-        * overlap, i.e. the data can be repaired by selecting the
-        * sectors from those mirrors without I/O error on the
-        * particular sectors. One example (with blocks >= 2 * sectorsize)
-        * would be that mirror #1 has an I/O error on the first sector,
-        * the second sector is good, and mirror #2 has an I/O error on
-        * the second sector, but the first sector is good.
-        * Then the first sector of the first mirror can be repaired by
-        * taking the first sector of the second mirror, and the
-        * second sector of the second mirror can be repaired by
-        * copying the contents of the 2nd sector of the 1st mirror.
-        * One more note: if the sectors of one mirror contain I/O
-        * errors, the checksum cannot be verified. In order to get
-        * the best data for repairing, the first attempt is to find
-        * a mirror without I/O errors and with a validated checksum.
-        * Only if this is not possible, the sectors are picked from
-        * mirrors with I/O errors without considering the checksum.
-        * If the latter is the case, at the end, the checksum of the
-        * repaired area is verified in order to correctly maintain
-        * the statistics.
-        */
-       for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS; mirror_index++) {
-               /*
-                * Note: the two members refs and outstanding_sectors are not
-                * used in the blocks that are used for the recheck procedure.
-                *
-                * But alloc_scrub_block() will initialize sblock::ref anyway,
-                * so we can use scrub_block_put() to clean them up.
-                *
-                * And here we don't setup the physical/dev for the sblock yet,
-                * they will be correctly initialized in scrub_setup_recheck_block().
-                */
-               sblocks_for_recheck[mirror_index] = alloc_scrub_block(sctx, NULL,
-                                                       logical, 0, 0, mirror_index);
-               if (!sblocks_for_recheck[mirror_index]) {
-                       spin_lock(&sctx->stat_lock);
-                       sctx->stat.malloc_errors++;
-                       sctx->stat.read_errors++;
-                       sctx->stat.uncorrectable_errors++;
-                       spin_unlock(&sctx->stat_lock);
-                       btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
-                       goto out;
-               }
-       }
-
-       /* Setup the context, map the logical blocks and alloc the sectors */
-       ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
-       if (ret) {
-               spin_lock(&sctx->stat_lock);
-               sctx->stat.read_errors++;
-               sctx->stat.uncorrectable_errors++;
-               spin_unlock(&sctx->stat_lock);
-               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
-               goto out;
-       }
-       BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
-       sblock_bad = sblocks_for_recheck[failed_mirror_index];
-
-       /* build and submit the bios for the failed mirror, check checksums */
-       scrub_recheck_block(fs_info, sblock_bad, 1);
-
-       if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
-           sblock_bad->no_io_error_seen) {
-               /*
-                * The error disappeared after reading sector by sector, or
-                * the area was part of a huge bio and other parts of the
-                * bio caused I/O errors, or the block layer merged several
-                * read requests into one and the error is caused by a
-                * different bio (usually one of the two latter cases is
-                * the cause)
-                */
-               spin_lock(&sctx->stat_lock);
-               sctx->stat.unverified_errors++;
-               sblock_to_check->data_corrected = 1;
-               spin_unlock(&sctx->stat_lock);
-
-               goto out;
-       }
-
-       if (!sblock_bad->no_io_error_seen) {
-               spin_lock(&sctx->stat_lock);
-               sctx->stat.read_errors++;
-               spin_unlock(&sctx->stat_lock);
-               if (__ratelimit(&rs))
-                       scrub_print_warning("i/o error", sblock_to_check);
-               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
-       } else if (sblock_bad->checksum_error) {
-               spin_lock(&sctx->stat_lock);
-               sctx->stat.csum_errors++;
-               spin_unlock(&sctx->stat_lock);
-               if (__ratelimit(&rs))
-                       scrub_print_warning("checksum error", sblock_to_check);
-               btrfs_dev_stat_inc_and_print(dev,
-                                            BTRFS_DEV_STAT_CORRUPTION_ERRS);
-       } else if (sblock_bad->header_error) {
-               spin_lock(&sctx->stat_lock);
-               sctx->stat.verify_errors++;
-               spin_unlock(&sctx->stat_lock);
-               if (__ratelimit(&rs))
-                       scrub_print_warning("checksum/header error",
-                                           sblock_to_check);
-               if (sblock_bad->generation_error)
-                       btrfs_dev_stat_inc_and_print(dev,
-                               BTRFS_DEV_STAT_GENERATION_ERRS);
-               else
-                       btrfs_dev_stat_inc_and_print(dev,
-                               BTRFS_DEV_STAT_CORRUPTION_ERRS);
-       }
-
-       if (sctx->readonly) {
-               ASSERT(!sctx->is_dev_replace);
-               goto out;
-       }
-
-       /*
-        * now build and submit the bios for the other mirrors, check
-        * checksums.
-        * First try to pick the mirror which is completely without I/O
-        * errors and also does not have a checksum error.
-        * If one is found, and if a checksum is present, the full block
-        * that is known to contain an error is rewritten. Afterwards
-        * the block is known to be corrected.
-        * If a mirror is found which is completely correct, and no
-        * checksum is present, only those sectors are rewritten that had
-        * an I/O error in the block to be repaired, since it cannot be
-        * determined, which copy of the other sectors is better (and it
-        * could happen otherwise that a correct sector would be
-        * overwritten by a bad one).
-        */
-       for (mirror_index = 0; ;mirror_index++) {
-               struct scrub_block *sblock_other;
-
-               if (mirror_index == failed_mirror_index)
-                       continue;
-
-               /* raid56's mirror can be more than BTRFS_MAX_MIRRORS */
-               if (!scrub_is_page_on_raid56(sblock_bad->sectors[0])) {
-                       if (mirror_index >= BTRFS_MAX_MIRRORS)
-                               break;
-                       if (!sblocks_for_recheck[mirror_index]->sector_count)
-                               break;
-
-                       sblock_other = sblocks_for_recheck[mirror_index];
-               } else {
-                       struct scrub_recover *r = sblock_bad->sectors[0]->recover;
-                       int max_allowed = r->bioc->num_stripes - r->bioc->replace_nr_stripes;
-
-                       if (mirror_index >= max_allowed)
-                               break;
-                       if (!sblocks_for_recheck[1]->sector_count)
-                               break;
-
-                       ASSERT(failed_mirror_index == 0);
-                       sblock_other = sblocks_for_recheck[1];
-                       sblock_other->mirror_num = 1 + mirror_index;
-               }
-
-               /* build and submit the bios, check checksums */
-               scrub_recheck_block(fs_info, sblock_other, 0);
-
-               if (!sblock_other->header_error &&
-                   !sblock_other->checksum_error &&
-                   sblock_other->no_io_error_seen) {
-                       if (sctx->is_dev_replace) {
-                               goto corrected_error;
-                       } else {
-                               ret = scrub_repair_block_from_good_copy(
-                                               sblock_bad, sblock_other);
-                               if (!ret)
-                                       goto corrected_error;
-                       }
-               }
-       }
-
-       if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
-               goto did_not_correct_error;
-
-       /*
-        * In case of I/O errors in the area that is supposed to be
-        * repaired, continue by picking good copies of those sectors.
-        * Select the good sectors from mirrors to rewrite bad sectors from
-        * the area to fix. Afterwards verify the checksum of the block
-        * that is supposed to be repaired. This verification step is
-        * only done for the purpose of statistic counting and for the
-        * final scrub report, whether errors remain.
-        * A perfect algorithm could make use of the checksum and try
-        * all possible combinations of sectors from the different mirrors
-        * until the checksum verification succeeds. For example, when
-        * the 2nd sector of mirror #1 faces I/O errors, and the 2nd sector
-        * of mirror #2 is readable but the final checksum test fails,
-        * then the 2nd sector of mirror #3 could be tried, whether now
-        * the final checksum succeeds. But this would be a rare
-        * exception and is therefore not implemented. At least it is
-        * avoided that the good copy is overwritten.
-        * A more useful improvement would be to pick the sectors
-        * without I/O error based on sector sizes (512 bytes on legacy
-        * disks) instead of on sectorsize. Then maybe 512 byte of one
-        * mirror could be repaired by taking 512 byte of a different
-        * mirror, even if other 512 byte sectors in the same sectorsize
-        * area are unreadable.
-        */
-       success = 1;
-       for (sector_num = 0; sector_num < sblock_bad->sector_count;
-            sector_num++) {
-               struct scrub_sector *sector_bad = sblock_bad->sectors[sector_num];
-               struct scrub_block *sblock_other = NULL;
-
-               /* Skip no-io-error sectors in scrub */
-               if (!sector_bad->io_error && !sctx->is_dev_replace)
-                       continue;
-
-               if (scrub_is_page_on_raid56(sblock_bad->sectors[0])) {
-                       /*
-                        * In case of dev replace, if raid56 rebuild process
-                        * didn't work out correct data, then copy the content
-                        * in sblock_bad to make sure target device is identical
-                        * to source device, instead of writing garbage data in
-                        * sblock_for_recheck array to target device.
-                        */
-                       sblock_other = NULL;
-               } else if (sector_bad->io_error) {
-                       /* Try to find no-io-error sector in mirrors */
-                       for (mirror_index = 0;
-                            mirror_index < BTRFS_MAX_MIRRORS &&
-                            sblocks_for_recheck[mirror_index]->sector_count > 0;
-                            mirror_index++) {
-                               if (!sblocks_for_recheck[mirror_index]->
-                                   sectors[sector_num]->io_error) {
-                                       sblock_other = sblocks_for_recheck[mirror_index];
-                                       break;
-                               }
-                       }
-                       if (!sblock_other)
-                               success = 0;
-               }
-
-               if (sctx->is_dev_replace) {
-                       /*
-                        * Did not find a mirror to fetch the sector from.
-                        * scrub_write_sector_to_dev_replace() handles this
-                        * case (sector->io_error), by filling the block with
-                        * zeros before submitting the write request
-                        */
-                       if (!sblock_other)
-                               sblock_other = sblock_bad;
-               } else if (sblock_other) {
-                       ret = scrub_repair_sector_from_good_copy(sblock_bad,
-                                                                sblock_other,
-                                                                sector_num, 0);
-                       if (0 == ret)
-                               sector_bad->io_error = 0;
-                       else
-                               success = 0;
-               }
-       }
-
-       if (success && !sctx->is_dev_replace) {
-               if (is_metadata || have_csum) {
-                       /*
-                        * need to verify the checksum now that all
-                        * sectors on disk are repaired (the write
-                        * request for data to be repaired is on its way).
-                        * Just be lazy and use scrub_recheck_block()
-                        * which re-reads the data before the checksum
-                        * is verified, but most likely the data comes out
-                        * of the page cache.
-                        */
-                       scrub_recheck_block(fs_info, sblock_bad, 1);
-                       if (!sblock_bad->header_error &&
-                           !sblock_bad->checksum_error &&
-                           sblock_bad->no_io_error_seen)
-                               goto corrected_error;
-                       else
-                               goto did_not_correct_error;
-               } else {
-corrected_error:
-                       spin_lock(&sctx->stat_lock);
-                       sctx->stat.corrected_errors++;
-                       sblock_to_check->data_corrected = 1;
-                       spin_unlock(&sctx->stat_lock);
-                       btrfs_err_rl_in_rcu(fs_info,
-                               "fixed up error at logical %llu on dev %s",
-                               logical, btrfs_dev_name(dev));
-               }
-       } else {
-did_not_correct_error:
-               spin_lock(&sctx->stat_lock);
-               sctx->stat.uncorrectable_errors++;
-               spin_unlock(&sctx->stat_lock);
-               btrfs_err_rl_in_rcu(fs_info,
-                       "unable to fixup (regular) error at logical %llu on dev %s",
-                       logical, btrfs_dev_name(dev));
-       }
-
-out:
-       for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS; mirror_index++) {
-               struct scrub_block *sblock = sblocks_for_recheck[mirror_index];
-               struct scrub_recover *recover;
-               int sector_index;
-
-               /* Not allocated, continue checking the next mirror */
-               if (!sblock)
-                       continue;
-
-               for (sector_index = 0; sector_index < sblock->sector_count;
-                    sector_index++) {
-                       /*
-                        * Here we just cleanup the recover, each sector will be
-                        * properly cleaned up by later scrub_block_put()
-                        */
-                       recover = sblock->sectors[sector_index]->recover;
-                       if (recover) {
-                               scrub_put_recover(fs_info, recover);
-                               sblock->sectors[sector_index]->recover = NULL;
-                       }
-               }
-               scrub_block_put(sblock);
-       }
-
-       ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
-       memalloc_nofs_restore(nofs_flag);
-       if (ret < 0)
-               return ret;
-       return 0;
-}
-
  static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
  {
         if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
@@ -1581,224 +893,6 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
         }
  }
  
-static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
-                                    struct scrub_block *sblocks_for_recheck[])
-{
-       struct scrub_ctx *sctx = original_sblock->sctx;
-       struct btrfs_fs_info *fs_info = sctx->fs_info;
-       u64 logical = original_sblock->logical;
-       u64 length = original_sblock->sector_count << fs_info->sectorsize_bits;
-       u64 generation = original_sblock->sectors[0]->generation;
-       u64 flags = original_sblock->sectors[0]->flags;
-       u64 have_csum = original_sblock->sectors[0]->have_csum;
-       struct scrub_recover *recover;
-       struct btrfs_io_context *bioc;
-       u64 sublen;
-       u64 mapped_length;
-       u64 stripe_offset;
-       int stripe_index;
-       int sector_index = 0;
-       int mirror_index;
-       int nmirrors;
-       int ret;
-
-       while (length > 0) {
-               sublen = min_t(u64, length, fs_info->sectorsize);
-               mapped_length = sublen;
-               bioc = NULL;
-
-               /*
-                * With a length of sectorsize, each returned stripe represents
-                * one mirror
-                */
-               btrfs_bio_counter_inc_blocked(fs_info);
-               ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
-                                      logical, &mapped_length, &bioc);
-               if (ret || !bioc || mapped_length < sublen) {
-                       btrfs_put_bioc(bioc);
-                       btrfs_bio_counter_dec(fs_info);
-                       return -EIO;
-               }
-
-               recover = kzalloc(sizeof(struct scrub_recover), GFP_KERNEL);
-               if (!recover) {
-                       btrfs_put_bioc(bioc);
-                       btrfs_bio_counter_dec(fs_info);
-                       return -ENOMEM;
-               }
-
-               refcount_set(&recover->refs, 1);
-               recover->bioc = bioc;
-               recover->map_length = mapped_length;
-
-               ASSERT(sector_index < SCRUB_MAX_SECTORS_PER_BLOCK);
-
-               nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS);
-
-               for (mirror_index = 0; mirror_index < nmirrors;
-                    mirror_index++) {
-                       struct scrub_block *sblock;
-                       struct scrub_sector *sector;
-
-                       sblock = sblocks_for_recheck[mirror_index];
-                       sblock->sctx = sctx;
-
-                       sector = alloc_scrub_sector(sblock, logical);
-                       if (!sector) {
-                               spin_lock(&sctx->stat_lock);
-                               sctx->stat.malloc_errors++;
-                               spin_unlock(&sctx->stat_lock);
-                               scrub_put_recover(fs_info, recover);
-                               return -ENOMEM;
-                       }
-                       sector->flags = flags;
-                       sector->generation = generation;
-                       sector->have_csum = have_csum;
-                       if (have_csum)
-                               memcpy(sector->csum,
-                                      original_sblock->sectors[0]->csum,
-                                      sctx->fs_info->csum_size);
-
-                       scrub_stripe_index_and_offset(logical,
-                                                     bioc->map_type,
-                                                     bioc->full_stripe_logical,
-                                                     bioc->num_stripes -
-                                                     bioc->replace_nr_stripes,
-                                                     mirror_index,
-                                                     &stripe_index,
-                                                     &stripe_offset);
-                       /*
-                        * We're at the first sector, also populate @sblock
-                        * physical and dev.
-                        */
-                       if (sector_index == 0) {
-                               sblock->physical =
-                                       bioc->stripes[stripe_index].physical +
-                                       stripe_offset;
-                               sblock->dev = bioc->stripes[stripe_index].dev;
-                               sblock->physical_for_dev_replace =
-                                       original_sblock->physical_for_dev_replace;
-                       }
-
-                       BUG_ON(sector_index >= original_sblock->sector_count);
-                       scrub_get_recover(recover);
-                       sector->recover = recover;
-               }
-               scrub_put_recover(fs_info, recover);
-               length -= sublen;
-               logical += sublen;
-               sector_index++;
-       }
-
-       return 0;
-}
-
-static void scrub_bio_wait_endio(struct bio *bio)
-{
-       complete(bio->bi_private);
-}
-
-static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
-                                       struct bio *bio,
-                                       struct scrub_sector *sector)
-{
-       DECLARE_COMPLETION_ONSTACK(done);
-
-       bio->bi_iter.bi_sector = (sector->offset + sector->sblock->logical) >>
-                                SECTOR_SHIFT;
-       bio->bi_private = &done;
-       bio->bi_end_io = scrub_bio_wait_endio;
-       raid56_parity_recover(bio, sector->recover->bioc, sector->sblock->mirror_num);
-
-       wait_for_completion_io(&done);
-       return blk_status_to_errno(bio->bi_status);
-}
-
-static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
-                                         struct scrub_block *sblock)
-{
-       struct scrub_sector *first_sector = sblock->sectors[0];
-       struct bio *bio;
-       int i;
-
-       /* All sectors in sblock belong to the same stripe on the same device. */
-       ASSERT(sblock->dev);
-       if (!sblock->dev->bdev)
-               goto out;
-
-       bio = bio_alloc(sblock->dev->bdev, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS);
-
-       for (i = 0; i < sblock->sector_count; i++) {
-               struct scrub_sector *sector = sblock->sectors[i];
-
-               bio_add_scrub_sector(bio, sector, fs_info->sectorsize);
-       }
-
-       if (scrub_submit_raid56_bio_wait(fs_info, bio, first_sector)) {
-               bio_put(bio);
-               goto out;
-       }
-
-       bio_put(bio);
-
-       scrub_recheck_block_checksum(sblock);
-
-       return;
-out:
-       for (i = 0; i < sblock->sector_count; i++)
-               sblock->sectors[i]->io_error = 1;
-
-       sblock->no_io_error_seen = 0;
-}
-
-/*
- * This function will check the on disk data for checksum errors, header errors
- * and read I/O errors. If any I/O errors happen, the exact sectors which are
- * errored are marked as being bad. The goal is to enable scrub to take those
- * sectors that are not errored from all the mirrors so that the sectors that
- * are errored in the just handled mirror can be repaired.
- */
-static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
-                               struct scrub_block *sblock,
-                               int retry_failed_mirror)
-{
-       int i;
-
-       sblock->no_io_error_seen = 1;
-
-       /* short cut for raid56 */
-       if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->sectors[0]))
-               return scrub_recheck_block_on_raid56(fs_info, sblock);
-
-       for (i = 0; i < sblock->sector_count; i++) {
-               struct scrub_sector *sector = sblock->sectors[i];
-               struct bio bio;
-               struct bio_vec bvec;
-
-               if (sblock->dev->bdev == NULL) {
-                       sector->io_error = 1;
-                       sblock->no_io_error_seen = 0;
-                       continue;
-               }
-
-               bio_init(&bio, sblock->dev->bdev, &bvec, 1, REQ_OP_READ);
-               bio_add_scrub_sector(&bio, sector, fs_info->sectorsize);
-               bio.bi_iter.bi_sector = (sblock->physical + sector->offset) >>
-                                       SECTOR_SHIFT;
-
-               btrfsic_check_bio(&bio);
-               if (submit_bio_wait(&bio)) {
-                       sector->io_error = 1;
-                       sblock->no_io_error_seen = 0;
-               }
-
-               bio_uninit(&bio);
-       }
-
-       if (sblock->no_io_error_seen)
-               scrub_recheck_block_checksum(sblock);
-}
-
  static inline int scrub_check_fsid(u8 fsid[], struct scrub_sector *sector)
  {
         struct btrfs_fs_devices *fs_devices = sector->sblock->dev->fs_devices;
@@ -1808,77 +902,6 @@ static inline int scrub_check_fsid(u8 fsid[], struct scrub_sector *sector)
         return !ret;
  }
  
-static void scrub_recheck_block_checksum(struct scrub_block *sblock)
-{
-       sblock->header_error = 0;
-       sblock->checksum_error = 0;
-       sblock->generation_error = 0;
-
-       if (sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_DATA)
-               scrub_checksum_data(sblock);
-       else
-               scrub_checksum_tree_block(sblock);
-}
-
-static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
-                                            struct scrub_block *sblock_good)
-{
-       int i;
-       int ret = 0;
-
-       for (i = 0; i < sblock_bad->sector_count; i++) {
-               int ret_sub;
-
-               ret_sub = scrub_repair_sector_from_good_copy(sblock_bad,
-                                                            sblock_good, i, 1);
-               if (ret_sub)
-                       ret = ret_sub;
-       }
-
-       return ret;
-}
-
-static int scrub_repair_sector_from_good_copy(struct scrub_block *sblock_bad,
-                                             struct scrub_block *sblock_good,
-                                             int sector_num, int force_write)
-{
-       struct scrub_sector *sector_bad = sblock_bad->sectors[sector_num];
-       struct scrub_sector *sector_good = sblock_good->sectors[sector_num];
-       struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
-       const u32 sectorsize = fs_info->sectorsize;
-
-       if (force_write || sblock_bad->header_error ||
-           sblock_bad->checksum_error || sector_bad->io_error) {
-               struct bio bio;
-               struct bio_vec bvec;
-               int ret;
-
-               if (!sblock_bad->dev->bdev) {
-                       btrfs_warn_rl(fs_info,
-                               "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
-                       return -EIO;
-               }
-
-               bio_init(&bio, sblock_bad->dev->bdev, &bvec, 1, REQ_OP_WRITE);
-               bio.bi_iter.bi_sector = (sblock_bad->physical +
-                                        sector_bad->offset) >> SECTOR_SHIFT;
-               ret = bio_add_scrub_sector(&bio, sector_good, sectorsize);
-
-               btrfsic_check_bio(&bio);
-               ret = submit_bio_wait(&bio);
-               bio_uninit(&bio);
-
-               if (ret) {
-                       btrfs_dev_stat_inc_and_print(sblock_bad->dev,
-                               BTRFS_DEV_STAT_WRITE_ERRS);
-                       atomic64_inc(&fs_info->dev_replace.num_write_errors);
-                       return -EIO;
-               }
-       }
-
-       return 0;
-}
-
  static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
  {
         int ret = 0;
@@ -1934,9 +957,6 @@ static int scrub_checksum(struct scrub_block *sblock)
                 ret = scrub_checksum_super(sblock);
         else
                 WARN_ON(1);
-       if (ret)
-               scrub_handle_errored_block(sblock);
-
         return ret;
  }
  
@@ -2916,16 +1936,13 @@ static void scrub_bio_end_io_worker(struct work_struct *work)
  
  static void scrub_block_complete(struct scrub_block *sblock)
  {
-       if (!sblock->no_io_error_seen) {
-               scrub_handle_errored_block(sblock);
-       } else {
+       if (sblock->no_io_error_seen)
                 /*
                  * if has checksum error, write via repair mechanism in
                  * dev replace case, otherwise write here in dev replace
                  * case.
                  */
                 scrub_checksum(sblock);
-       }
  }
  
  static void drop_csum_range(struct scrub_ctx *sctx, struct btrfs_ordered_sum *sum)