Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6
[sfrench/cifs-2.6.git] / fs / btrfs / volumes.c
index dd06e18e5aac7518191e02791073345e6a0f51cd..5f01dad4b696352f211106b4c9bd0024268913b6 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
 #include <linux/random.h>
+#include <linux/iocontext.h>
 #include <asm/div64.h>
 #include "compat.h"
 #include "ctree.h"
@@ -124,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
        return NULL;
 }
 
+static void requeue_list(struct btrfs_pending_bios *pending_bios,
+                       struct bio *head, struct bio *tail)
+{
+
+       struct bio *old_head;
+
+       old_head = pending_bios->head;
+       pending_bios->head = head;
+       if (pending_bios->tail)
+               tail->bi_next = old_head;
+       else
+               pending_bios->tail = tail;
+}
+
 /*
  * we try to collect pending bios for a device so we don't get a large
  * number of procs sending bios down to the same device.  This greatly
@@ -140,31 +155,44 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
        struct bio *pending;
        struct backing_dev_info *bdi;
        struct btrfs_fs_info *fs_info;
+       struct btrfs_pending_bios *pending_bios;
        struct bio *tail;
        struct bio *cur;
        int again = 0;
-       unsigned long num_run = 0;
+       unsigned long num_run;
+       unsigned long num_sync_run;
        unsigned long limit;
+       unsigned long last_waited = 0;
 
-       bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
+       bdi = blk_get_backing_dev_info(device->bdev);
        fs_info = device->dev_root->fs_info;
        limit = btrfs_async_submit_limit(fs_info);
        limit = limit * 2 / 3;
 
+       /* we want to make sure that every time we switch from the sync
+        * list to the normal list, we unplug
+        */
+       num_sync_run = 0;
+
 loop:
        spin_lock(&device->io_lock);
+       num_run = 0;
 
 loop_lock:
+
        /* take all the bios off the list at once and process them
         * later on (without the lock held).  But, remember the
         * tail and other pointers so the bios can be properly reinserted
         * into the list if we hit congestion
         */
-       pending = device->pending_bios;
-       tail = device->pending_bio_tail;
+       if (device->pending_sync_bios.head)
+               pending_bios = &device->pending_sync_bios;
+       else
+               pending_bios = &device->pending_bios;
+
+       pending = pending_bios->head;
+       tail = pending_bios->tail;
        WARN_ON(pending && !tail);
-       device->pending_bios = NULL;
-       device->pending_bio_tail = NULL;
 
        /*
         * if pending was null this time around, no bios need processing
@@ -174,16 +202,41 @@ loop_lock:
         * device->running_pending is used to synchronize with the
         * schedule_bio code.
         */
-       if (pending) {
-               again = 1;
-               device->running_pending = 1;
-       } else {
+       if (device->pending_sync_bios.head == NULL &&
+           device->pending_bios.head == NULL) {
                again = 0;
                device->running_pending = 0;
+       } else {
+               again = 1;
+               device->running_pending = 1;
        }
+
+       pending_bios->head = NULL;
+       pending_bios->tail = NULL;
+
        spin_unlock(&device->io_lock);
 
+       /*
+        * if we're doing the regular priority list, make sure we unplug
+        * for any high prio bios we've sent down
+        */
+       if (pending_bios == &device->pending_bios && num_sync_run > 0) {
+               num_sync_run = 0;
+               blk_run_backing_dev(bdi, NULL);
+       }
+
        while (pending) {
+
+               rmb();
+               if (pending_bios != &device->pending_sync_bios &&
+                   device->pending_sync_bios.head &&
+                   num_run > 16) {
+                       cond_resched();
+                       spin_lock(&device->io_lock);
+                       requeue_list(pending_bios, pending, tail);
+                       goto loop_lock;
+               }
+
                cur = pending;
                pending = pending->bi_next;
                cur->bi_next = NULL;
@@ -194,10 +247,18 @@ loop_lock:
                        wake_up(&fs_info->async_submit_wait);
 
                BUG_ON(atomic_read(&cur->bi_cnt) == 0);
-               bio_get(cur);
                submit_bio(cur->bi_rw, cur);
-               bio_put(cur);
                num_run++;
+               if (bio_sync(cur))
+                       num_sync_run++;
+
+               if (need_resched()) {
+                       if (num_sync_run) {
+                               blk_run_backing_dev(bdi, NULL);
+                               num_sync_run = 0;
+                       }
+                       cond_resched();
+               }
 
                /*
                 * we made progress, there is more work to do and the bdi
@@ -206,17 +267,41 @@ loop_lock:
                 */
                if (pending && bdi_write_congested(bdi) && num_run > 16 &&
                    fs_info->fs_devices->open_devices > 1) {
-                       struct bio *old_head;
+                       struct io_context *ioc;
 
-                       spin_lock(&device->io_lock);
-
-                       old_head = device->pending_bios;
-                       device->pending_bios = pending;
-                       if (device->pending_bio_tail)
-                               tail->bi_next = old_head;
-                       else
-                               device->pending_bio_tail = tail;
+                       ioc = current->io_context;
 
+                       /*
+                        * the main goal here is that we don't want to
+                        * block if we're going to be able to submit
+                        * more requests without blocking.
+                        *
+                        * This code does two great things, it pokes into
+                        * the elevator code from a filesystem _and_
+                        * it makes assumptions about how batching works.
+                        */
+                       if (ioc && ioc->nr_batch_requests > 0 &&
+                           time_before(jiffies, ioc->last_waited + HZ/50UL) &&
+                           (last_waited == 0 ||
+                            ioc->last_waited == last_waited)) {
+                               /*
+                                * we want to go through our batch of
+                                * requests and stop.  So, we copy out
+                                * the ioc->last_waited time and test
+                                * against it before looping
+                                */
+                               last_waited = ioc->last_waited;
+                               if (need_resched()) {
+                                       if (num_sync_run) {
+                                               blk_run_backing_dev(bdi, NULL);
+                                               num_sync_run = 0;
+                                       }
+                                       cond_resched();
+                               }
+                               continue;
+                       }
+                       spin_lock(&device->io_lock);
+                       requeue_list(pending_bios, pending, tail);
                        device->running_pending = 1;
 
                        spin_unlock(&device->io_lock);
@@ -224,13 +309,32 @@ loop_lock:
                        goto done;
                }
        }
+
+       if (num_sync_run) {
+               num_sync_run = 0;
+               blk_run_backing_dev(bdi, NULL);
+       }
+
+       cond_resched();
        if (again)
                goto loop;
 
        spin_lock(&device->io_lock);
-       if (device->pending_bios)
+       if (device->pending_bios.head || device->pending_sync_bios.head)
                goto loop_lock;
        spin_unlock(&device->io_lock);
+
+       /*
+        * IO has already been through a long path to get here.  Checksumming,
+        * async helper threads, perhaps compression.  We've done a pretty
+        * good job of collecting a batch of IO and should just unplug
+        * the device right away.
+        *
+        * This will help anyone who is waiting on the IO, they might have
+        * already unplugged, but managed to do so before the bio they
+        * cared about found its way down here.
+        */
+       blk_run_backing_dev(bdi, NULL);
 done:
        return 0;
 }
@@ -1439,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
        btrfs_set_device_io_align(leaf, dev_item, device->io_align);
        btrfs_set_device_io_width(leaf, dev_item, device->io_width);
        btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
-       btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+       btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
        btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
        btrfs_mark_buffer_dirty(leaf);
 
@@ -1836,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        device->total_bytes = new_size;
        if (device->writeable)
                device->fs_devices->total_rw_bytes -= diff;
-       ret = btrfs_update_device(trans, device);
-       if (ret) {
-               unlock_chunks(root);
-               btrfs_end_transaction(trans, root);
-               goto done;
-       }
-       WARN_ON(diff > old_total);
-       btrfs_set_super_total_bytes(super_copy, old_total - diff);
        unlock_chunks(root);
        btrfs_end_transaction(trans, root);
 
@@ -1875,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                length = btrfs_dev_extent_length(l, dev_extent);
 
                if (key.offset + length <= new_size)
-                       goto done;
+                       break;
 
                chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
                chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1888,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                        goto done;
        }
 
+       /* Shrinking succeeded, else we would be at "done". */
+       trans = btrfs_start_transaction(root, 1);
+       if (!trans) {
+               ret = -ENOMEM;
+               goto done;
+       }
+       lock_chunks(root);
+
+       device->disk_total_bytes = new_size;
+       /* Now btrfs_update_device() will change the on-disk size. */
+       ret = btrfs_update_device(trans, device);
+       if (ret) {
+               unlock_chunks(root);
+               btrfs_end_transaction(trans, root);
+               goto done;
+       }
+       WARN_ON(diff > old_total);
+       btrfs_set_super_total_bytes(super_copy, old_total - diff);
+       unlock_chunks(root);
+       btrfs_end_transaction(trans, root);
 done:
        btrfs_free_path(path);
        return ret;
@@ -2458,7 +2574,7 @@ again:
                        max_errors = 1;
                }
        }
-       if (multi_ret && rw == WRITE &&
+       if (multi_ret && (rw & (1 << BIO_RW)) &&
            stripes_allocated < stripes_required) {
                stripes_allocated = map->num_stripes;
                free_extent_map(em);
@@ -2723,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
                                 int rw, struct bio *bio)
 {
        int should_queue = 1;
+       struct btrfs_pending_bios *pending_bios;
 
        /* don't bother with additional async steps for reads, right now */
        if (!(rw & (1 << BIO_RW))) {
@@ -2744,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
        bio->bi_rw |= rw;
 
        spin_lock(&device->io_lock);
+       if (bio_sync(bio))
+               pending_bios = &device->pending_sync_bios;
+       else
+               pending_bios = &device->pending_bios;
 
-       if (device->pending_bio_tail)
-               device->pending_bio_tail->bi_next = bio;
+       if (pending_bios->tail)
+               pending_bios->tail->bi_next = bio;
 
-       device->pending_bio_tail = bio;
-       if (!device->pending_bios)
-               device->pending_bios = bio;
+       pending_bios->tail = bio;
+       if (!pending_bios->head)
+               pending_bios->head = bio;
        if (device->running_pending)
                should_queue = 0;
 
@@ -2967,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
        unsigned long ptr;
 
        device->devid = btrfs_device_id(leaf, dev_item);
-       device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+       device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+       device->total_bytes = device->disk_total_bytes;
        device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
        device->type = btrfs_device_type(leaf, dev_item);
        device->io_align = btrfs_device_io_align(leaf, dev_item);