Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

[sfrench/cifs-2.6.git] / drivers / md / raid5.c
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 2ac2e56a1a40ec508b8d7f6ec6c2d449d7777861..467c16982d02e541bc47f4e32c0c90939fb68d52 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -405,6 +405,8 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
         if (newsize <= conf->pool_size)
                 return 0; /* never bother to shrink */
  
+       md_allow_write(conf->mddev);
+
         /* Step 1 */
         sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
                                sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
@@ -544,35 +546,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
         }
  
         if (uptodate) {
-#if 0
-               struct bio *bio;
-               unsigned long flags;
-               spin_lock_irqsave(&conf->device_lock, flags);
-               /* we can return a buffer if we bypassed the cache or
-                * if the top buffer is not in highmem.  If there are
-                * multiple buffers, leave the extra work to
-                * handle_stripe
-                */
-               buffer = sh->bh_read[i];
-               if (buffer &&
-                   (!PageHighMem(buffer->b_page)
-                    || buffer->b_page == bh->b_page )
-                       ) {
-                       sh->bh_read[i] = buffer->b_reqnext;
-                       buffer->b_reqnext = NULL;
-               } else
-                       buffer = NULL;
-               spin_unlock_irqrestore(&conf->device_lock, flags);
-               if (sh->bh_page[i]==bh->b_page)
-                       set_buffer_uptodate(bh);
-               if (buffer) {
-                       if (buffer->b_page != bh->b_page)
-                               memcpy(buffer->b_data, bh->b_data, bh->b_size);
-                       buffer->b_end_io(buffer, 1);
-               }
-#else
                 set_bit(R5_UPTODATE, &sh->dev[i].flags);
-#endif
                 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
                         rdev = conf->disks[i].rdev;
                         printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
@@ -618,14 +592,6 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
                 }
         }
         rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
-#if 0
-       /* must restore b_page before unlocking buffer... */
-       if (sh->bh_page[i] != bh->b_page) {
-               bh->b_page = sh->bh_page[i];
-               bh->b_data = page_address(bh->b_page);
-               clear_buffer_uptodate(bh);
-       }
-#endif
         clear_bit(R5_LOCKED, &sh->dev[i].flags);
         set_bit(STRIPE_HANDLE, &sh->state);
         release_stripe(sh);
@@ -823,7 +789,8 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
  static sector_t compute_blocknr(struct stripe_head *sh, int i)
  {
         raid5_conf_t *conf = sh->raid_conf;
-       int raid_disks = sh->disks, data_disks = raid_disks - 1;
+       int raid_disks = sh->disks;
+       int data_disks = raid_disks - conf->max_degraded;
         sector_t new_sector = sh->sector, check;
         int sectors_per_chunk = conf->chunk_size >> 9;
         sector_t stripe;
@@ -859,7 +826,6 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
                 }
                 break;
         case 6:
-               data_disks = raid_disks - 2;
                 if (i == raid6_next_disk(sh->pd_idx, raid_disks))
                         return 0; /* It is the Q disk */
                 switch (conf->algorithm) {
@@ -1355,8 +1321,10 @@ static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
         int pd_idx, dd_idx;
         int chunk_offset = sector_div(stripe, sectors_per_chunk);
  
-       raid5_compute_sector(stripe*(disks-1)*sectors_per_chunk
-                            + chunk_offset, disks, disks-1, &dd_idx, &pd_idx, conf);
+       raid5_compute_sector(stripe * (disks - conf->max_degraded)
+                            *sectors_per_chunk + chunk_offset,
+                            disks, disks - conf->max_degraded,
+                            &dd_idx, &pd_idx, conf);
         return pd_idx;
  }
  
@@ -1617,15 +1585,6 @@ static void handle_stripe5(struct stripe_head *sh)
                                 } else if (test_bit(R5_Insync, &dev->flags)) {
                                         set_bit(R5_LOCKED, &dev->flags);
                                         set_bit(R5_Wantread, &dev->flags);
-#if 0
-                                       /* if I am just reading this block and we don't have
-                                          a failed drive, or any pending writes then sidestep the cache */
-                                       if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
-                                           ! syncing && !failed && !to_write) {
-                                               sh->bh_cache[i]->b_page =  sh->bh_read[i]->b_page;
-                                               sh->bh_cache[i]->b_data =  sh->bh_read[i]->b_data;
-                                       }
-#endif
                                         locked++;
                                         PRINTK("Reading block %d (sync=%d)\n", 
                                                 i, syncing);
@@ -1643,9 +1602,6 @@ static void handle_stripe5(struct stripe_head *sh)
                         dev = &sh->dev[i];
                         if ((dev->towrite || i == sh->pd_idx) &&
                             (!test_bit(R5_LOCKED, &dev->flags) 
-#if 0
-|| sh->bh_page[i]!=bh->b_page
-#endif
                                     ) &&
                             !test_bit(R5_UPTODATE, &dev->flags)) {
                                 if (test_bit(R5_Insync, &dev->flags)
@@ -1657,9 +1613,6 @@ static void handle_stripe5(struct stripe_head *sh)
                         /* Would I have to read this buffer for reconstruct_write */
                         if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
                             (!test_bit(R5_LOCKED, &dev->flags) 
-#if 0
-|| sh->bh_page[i] != bh->b_page
-#endif
                                     ) &&
                             !test_bit(R5_UPTODATE, &dev->flags)) {
                                 if (test_bit(R5_Insync, &dev->flags)) rcw++;
@@ -1867,23 +1820,25 @@ static void handle_stripe5(struct stripe_head *sh)
                 return_bi = bi->bi_next;
                 bi->bi_next = NULL;
                 bi->bi_size = 0;
-               bi->bi_end_io(bi, bytes, 0);
+               bi->bi_end_io(bi, bytes,
+                             test_bit(BIO_UPTODATE, &bi->bi_flags)
+                               ? 0 : -EIO);
         }
         for (i=disks; i-- ;) {
                 int rw;
                 struct bio *bi;
                 mdk_rdev_t *rdev;
                 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
-                       rw = 1;
+                       rw = WRITE;
                 else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
-                       rw = 0;
+                       rw = READ;
                 else
                         continue;
   
                 bi = &sh->dev[i].req;
   
                 bi->bi_rw = rw;
-               if (rw)
+               if (rw == WRITE)
                         bi->bi_end_io = raid5_end_write_request;
                 else
                         bi->bi_end_io = raid5_end_read_request;
@@ -1919,7 +1874,7 @@ static void handle_stripe5(struct stripe_head *sh)
                                 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
                         generic_make_request(bi);
                 } else {
-                       if (rw == 1)
+                       if (rw == WRITE)
                                 set_bit(STRIPE_DEGRADED, &sh->state);
                         PRINTK("skip op %ld on disc %d for sector %llu\n",
                                 bi->bi_rw, i, (unsigned long long)sh->sector);
@@ -2195,15 +2150,6 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                                 } else if (test_bit(R5_Insync, &dev->flags)) {
                                         set_bit(R5_LOCKED, &dev->flags);
                                         set_bit(R5_Wantread, &dev->flags);
-#if 0
-                                       /* if I am just reading this block and we don't have
-                                          a failed drive, or any pending writes then sidestep the cache */
-                                       if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
-                                           ! syncing && !failed && !to_write) {
-                                               sh->bh_cache[i]->b_page =  sh->bh_read[i]->b_page;
-                                               sh->bh_cache[i]->b_data =  sh->bh_read[i]->b_data;
-                                       }
-#endif
                                         locked++;
                                         PRINTK("Reading block %d (sync=%d)\n",
                                                 i, syncing);
@@ -2222,9 +2168,6 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                         if (!test_bit(R5_OVERWRITE, &dev->flags)
                             && i != pd_idx && i != qd_idx
                             && (!test_bit(R5_LOCKED, &dev->flags)
-#if 0
-                               || sh->bh_page[i] != bh->b_page
-#endif
                                     ) &&
                             !test_bit(R5_UPTODATE, &dev->flags)) {
                                 if (test_bit(R5_Insync, &dev->flags)) rcw++;
@@ -2420,23 +2363,25 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                 return_bi = bi->bi_next;
                 bi->bi_next = NULL;
                 bi->bi_size = 0;
-               bi->bi_end_io(bi, bytes, 0);
+               bi->bi_end_io(bi, bytes,
+                             test_bit(BIO_UPTODATE, &bi->bi_flags)
+                               ? 0 : -EIO);
         }
         for (i=disks; i-- ;) {
                 int rw;
                 struct bio *bi;
                 mdk_rdev_t *rdev;
                 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
-                       rw = 1;
+                       rw = WRITE;
                 else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
-                       rw = 0;
+                       rw = READ;
                 else
                         continue;
  
                 bi = &sh->dev[i].req;
  
                 bi->bi_rw = rw;
-               if (rw)
+               if (rw == WRITE)
                         bi->bi_end_io = raid5_end_write_request;
                 else
                         bi->bi_end_io = raid5_end_read_request;
@@ -2472,7 +2417,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                                 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
                         generic_make_request(bi);
                 } else {
-                       if (rw == 1)
+                       if (rw == WRITE)
                                 set_bit(STRIPE_DEGRADED, &sh->state);
                         PRINTK("skip op %ld on disc %d for sector %llu\n",
                                 bi->bi_rw, i, (unsigned long long)sh->sector);
@@ -2624,7 +2569,7 @@ static int raid5_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_
         unsigned int chunk_sectors = mddev->chunk_size >> 9;
         unsigned int bio_sectors = bio->bi_size >> 9;
  
-       if (bio_data_dir(bio))
+       if (bio_data_dir(bio) == WRITE)
                 return biovec->bv_len; /* always allow writes to be mergeable */
  
         max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
@@ -2735,7 +2680,7 @@ static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
         mdk_rdev_t *rdev;
  
         if (!in_chunk_boundary(mddev, raid_bio)) {
-               printk("chunk_aligned_read : non aligned\n");
+               PRINTK("chunk_aligned_read : non aligned\n");
                 return 0;
         }
         /*
@@ -2808,6 +2753,11 @@ static int make_request(request_queue_t *q, struct bio * bi)
         disk_stat_inc(mddev->gendisk, ios[rw]);
         disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
  
+       if (rw == READ &&
+            mddev->reshape_position == MaxSector &&
+            chunk_aligned_read(q,bi))
+               return 0;
+
         logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
         last_sector = bi->bi_sector + (bi->bi_size>>9);
         bi->bi_next = NULL;
@@ -2915,7 +2865,9 @@ static int make_request(request_queue_t *q, struct bio * bi)
                 if ( rw == WRITE )
                         md_write_end(mddev);
                 bi->bi_size = 0;
-               bi->bi_end_io(bi, bytes, 0);
+               bi->bi_end_io(bi, bytes,
+                             test_bit(BIO_UPTODATE, &bi->bi_flags)
+                               ? 0 : -EIO);
         }
         return 0;
  }
@@ -3183,7 +3135,9 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
                 int bytes = raid_bio->bi_size;
  
                 raid_bio->bi_size = 0;
-               raid_bio->bi_end_io(raid_bio, bytes, 0);
+               raid_bio->bi_end_io(raid_bio, bytes,
+                             test_bit(BIO_UPTODATE, &raid_bio->bi_flags)
+                               ? 0 : -EIO);
         }
         if (atomic_dec_and_test(&conf->active_aligned_reads))
                 wake_up(&conf->wait_for_stripe);
@@ -3298,6 +3252,7 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
                 else
                         break;
         }
+       md_allow_write(mddev);
         while (new > conf->max_nr_stripes) {
                 if (grow_one_stripe(conf))
                         conf->max_nr_stripes++;