Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6

[sfrench/cifs-2.6.git] / fs / btrfs / volumes.c
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index dd06e18e5aac7518191e02791073345e6a0f51cd..5f01dad4b696352f211106b4c9bd0024268913b6 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -20,6 +20,7 @@
  #include <linux/buffer_head.h>
  #include <linux/blkdev.h>
  #include <linux/random.h>
+#include <linux/iocontext.h>
  #include <asm/div64.h>
  #include "compat.h"
  #include "ctree.h"
@@ -124,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
         return NULL;
  }
  
+static void requeue_list(struct btrfs_pending_bios *pending_bios,
+                       struct bio *head, struct bio *tail)
+{
+
+       struct bio *old_head;
+
+       old_head = pending_bios->head;
+       pending_bios->head = head;
+       if (pending_bios->tail)
+               tail->bi_next = old_head;
+       else
+               pending_bios->tail = tail;
+}
+
  /*
   * we try to collect pending bios for a device so we don't get a large
   * number of procs sending bios down to the same device.  This greatly
@@ -140,31 +155,44 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
         struct bio *pending;
         struct backing_dev_info *bdi;
         struct btrfs_fs_info *fs_info;
+       struct btrfs_pending_bios *pending_bios;
         struct bio *tail;
         struct bio *cur;
         int again = 0;
-       unsigned long num_run = 0;
+       unsigned long num_run;
+       unsigned long num_sync_run;
         unsigned long limit;
+       unsigned long last_waited = 0;
  
-       bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
+       bdi = blk_get_backing_dev_info(device->bdev);
         fs_info = device->dev_root->fs_info;
         limit = btrfs_async_submit_limit(fs_info);
         limit = limit * 2 / 3;
  
+       /* we want to make sure that every time we switch from the sync
+        * list to the normal list, we unplug
+        */
+       num_sync_run = 0;
+
  loop:
         spin_lock(&device->io_lock);
+       num_run = 0;
  
  loop_lock:
+
         /* take all the bios off the list at once and process them
          * later on (without the lock held).  But, remember the
          * tail and other pointers so the bios can be properly reinserted
          * into the list if we hit congestion
          */
-       pending = device->pending_bios;
-       tail = device->pending_bio_tail;
+       if (device->pending_sync_bios.head)
+               pending_bios = &device->pending_sync_bios;
+       else
+               pending_bios = &device->pending_bios;
+
+       pending = pending_bios->head;
+       tail = pending_bios->tail;
         WARN_ON(pending && !tail);
-       device->pending_bios = NULL;
-       device->pending_bio_tail = NULL;
  
         /*
          * if pending was null this time around, no bios need processing
@@ -174,16 +202,41 @@ loop_lock:
          * device->running_pending is used to synchronize with the
          * schedule_bio code.
          */
-       if (pending) {
-               again = 1;
-               device->running_pending = 1;
-       } else {
+       if (device->pending_sync_bios.head == NULL &&
+           device->pending_bios.head == NULL) {
                 again = 0;
                 device->running_pending = 0;
+       } else {
+               again = 1;
+               device->running_pending = 1;
         }
+
+       pending_bios->head = NULL;
+       pending_bios->tail = NULL;
+
         spin_unlock(&device->io_lock);
  
+       /*
+        * if we're doing the regular priority list, make sure we unplug
+        * for any high prio bios we've sent down
+        */
+       if (pending_bios == &device->pending_bios && num_sync_run > 0) {
+               num_sync_run = 0;
+               blk_run_backing_dev(bdi, NULL);
+       }
+
         while (pending) {
+
+               rmb();
+               if (pending_bios != &device->pending_sync_bios &&
+                   device->pending_sync_bios.head &&
+                   num_run > 16) {
+                       cond_resched();
+                       spin_lock(&device->io_lock);
+                       requeue_list(pending_bios, pending, tail);
+                       goto loop_lock;
+               }
+
                 cur = pending;
                 pending = pending->bi_next;
                 cur->bi_next = NULL;
@@ -194,10 +247,18 @@ loop_lock:
                         wake_up(&fs_info->async_submit_wait);
  
                 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
-               bio_get(cur);
                 submit_bio(cur->bi_rw, cur);
-               bio_put(cur);
                 num_run++;
+               if (bio_sync(cur))
+                       num_sync_run++;
+
+               if (need_resched()) {
+                       if (num_sync_run) {
+                               blk_run_backing_dev(bdi, NULL);
+                               num_sync_run = 0;
+                       }
+                       cond_resched();
+               }
  
                 /*
                  * we made progress, there is more work to do and the bdi
@@ -206,17 +267,41 @@ loop_lock:
                  */
                 if (pending && bdi_write_congested(bdi) && num_run > 16 &&
                     fs_info->fs_devices->open_devices > 1) {
-                       struct bio *old_head;
+                       struct io_context *ioc;
  
-                       spin_lock(&device->io_lock);
-
-                       old_head = device->pending_bios;
-                       device->pending_bios = pending;
-                       if (device->pending_bio_tail)
-                               tail->bi_next = old_head;
-                       else
-                               device->pending_bio_tail = tail;
+                       ioc = current->io_context;
  
+                       /*
+                        * the main goal here is that we don't want to
+                        * block if we're going to be able to submit
+                        * more requests without blocking.
+                        *
+                        * This code does two great things, it pokes into
+                        * the elevator code from a filesystem _and_
+                        * it makes assumptions about how batching works.
+                        */
+                       if (ioc && ioc->nr_batch_requests > 0 &&
+                           time_before(jiffies, ioc->last_waited + HZ/50UL) &&
+                           (last_waited == 0 ||
+                            ioc->last_waited == last_waited)) {
+                               /*
+                                * we want to go through our batch of
+                                * requests and stop.  So, we copy out
+                                * the ioc->last_waited time and test
+                                * against it before looping
+                                */
+                               last_waited = ioc->last_waited;
+                               if (need_resched()) {
+                                       if (num_sync_run) {
+                                               blk_run_backing_dev(bdi, NULL);
+                                               num_sync_run = 0;
+                                       }
+                                       cond_resched();
+                               }
+                               continue;
+                       }
+                       spin_lock(&device->io_lock);
+                       requeue_list(pending_bios, pending, tail);
                         device->running_pending = 1;
  
                         spin_unlock(&device->io_lock);
@@ -224,13 +309,32 @@ loop_lock:
                         goto done;
                 }
         }
+
+       if (num_sync_run) {
+               num_sync_run = 0;
+               blk_run_backing_dev(bdi, NULL);
+       }
+
+       cond_resched();
         if (again)
                 goto loop;
  
         spin_lock(&device->io_lock);
-       if (device->pending_bios)
+       if (device->pending_bios.head || device->pending_sync_bios.head)
                 goto loop_lock;
         spin_unlock(&device->io_lock);
+
+       /*
+        * IO has already been through a long path to get here.  Checksumming,
+        * async helper threads, perhaps compression.  We've done a pretty
+        * good job of collecting a batch of IO and should just unplug
+        * the device right away.
+        *
+        * This will help anyone who is waiting on the IO, they might have
+        * already unplugged, but managed to do so before the bio they
+        * cared about found its way down here.
+        */
+       blk_run_backing_dev(bdi, NULL);
  done:
         return 0;
  }
@@ -1439,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
         btrfs_set_device_io_align(leaf, dev_item, device->io_align);
         btrfs_set_device_io_width(leaf, dev_item, device->io_width);
         btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
-       btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+       btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
         btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
         btrfs_mark_buffer_dirty(leaf);
  
@@ -1836,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
         device->total_bytes = new_size;
         if (device->writeable)
                 device->fs_devices->total_rw_bytes -= diff;
-       ret = btrfs_update_device(trans, device);
-       if (ret) {
-               unlock_chunks(root);
-               btrfs_end_transaction(trans, root);
-               goto done;
-       }
-       WARN_ON(diff > old_total);
-       btrfs_set_super_total_bytes(super_copy, old_total - diff);
         unlock_chunks(root);
         btrfs_end_transaction(trans, root);
  
@@ -1875,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                 length = btrfs_dev_extent_length(l, dev_extent);
  
                 if (key.offset + length <= new_size)
-                       goto done;
+                       break;
  
                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1888,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                         goto done;
         }
  
+       /* Shrinking succeeded, else we would be at "done". */
+       trans = btrfs_start_transaction(root, 1);
+       if (!trans) {
+               ret = -ENOMEM;
+               goto done;
+       }
+       lock_chunks(root);
+
+       device->disk_total_bytes = new_size;
+       /* Now btrfs_update_device() will change the on-disk size. */
+       ret = btrfs_update_device(trans, device);
+       if (ret) {
+               unlock_chunks(root);
+               btrfs_end_transaction(trans, root);
+               goto done;
+       }
+       WARN_ON(diff > old_total);
+       btrfs_set_super_total_bytes(super_copy, old_total - diff);
+       unlock_chunks(root);
+       btrfs_end_transaction(trans, root);
  done:
         btrfs_free_path(path);
         return ret;
@@ -2458,7 +2574,7 @@ again:
                         max_errors = 1;
                 }
         }
-       if (multi_ret && rw == WRITE &&
+       if (multi_ret && (rw & (1 << BIO_RW)) &&
             stripes_allocated < stripes_required) {
                 stripes_allocated = map->num_stripes;
                 free_extent_map(em);
@@ -2723,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
                                  int rw, struct bio *bio)
  {
         int should_queue = 1;
+       struct btrfs_pending_bios *pending_bios;
  
         /* don't bother with additional async steps for reads, right now */
         if (!(rw & (1 << BIO_RW))) {
@@ -2744,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
         bio->bi_rw |= rw;
  
         spin_lock(&device->io_lock);
+       if (bio_sync(bio))
+               pending_bios = &device->pending_sync_bios;
+       else
+               pending_bios = &device->pending_bios;
  
-       if (device->pending_bio_tail)
-               device->pending_bio_tail->bi_next = bio;
+       if (pending_bios->tail)
+               pending_bios->tail->bi_next = bio;
  
-       device->pending_bio_tail = bio;
-       if (!device->pending_bios)
-               device->pending_bios = bio;
+       pending_bios->tail = bio;
+       if (!pending_bios->head)
+               pending_bios->head = bio;
         if (device->running_pending)
                 should_queue = 0;
  
@@ -2967,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
         unsigned long ptr;
  
         device->devid = btrfs_device_id(leaf, dev_item);
-       device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+       device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+       device->total_bytes = device->disk_total_bytes;
         device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
         device->type = btrfs_device_type(leaf, dev_item);
         device->io_align = btrfs_device_io_align(leaf, dev_item);