Merge branch 'stable/for-jens-4.19' of git://git.kernel.org/pub/scm/linux/kernel...
authorJens Axboe <axboe@kernel.dk>
Mon, 27 Aug 2018 17:27:32 +0000 (11:27 -0600)
committerJens Axboe <axboe@kernel.dk>
Mon, 27 Aug 2018 17:27:32 +0000 (11:27 -0600)
Pull Xen block driver fixes from Konrad:

"Fix for flushing out persistent pages at a deterministic rate"

* 'stable/for-jens-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/blkback: remove unused pers_gnts_lock from struct xen_blkif_ring
  xen/blkback: move persistent grants flags to bool
  xen/blkfront: reorder tests in xlblk_init()
  xen/blkfront: cleanup stale persistent grants
  xen/blkback: don't keep persistent grants too long

Documentation/ABI/testing/sysfs-driver-xen-blkback
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkfront.c

index 8bb43b66eb55a1f8c0cbac1917301b2e96a55bbc..4e7babb3ba1fecc673018253d4adbb860e0cb8a4 100644 (file)
@@ -15,3 +15,13 @@ Description:
                 blkback. If the frontend tries to use more than
                 max_persistent_grants, the LRU kicks in and starts
                 removing 5% of max_persistent_grants every 100ms.
+
+What:           /sys/module/xen_blkback/parameters/persistent_grant_unused_seconds
+Date:           August 2018
+KernelVersion:  4.19
+Contact:        Roger Pau MonnĂ© <roger.pau@citrix.com>
+Description:
+                How long a persistent grant is allowed to remain
+                allocated without being in use. The time is in
+                seconds, 0 means indefinitely long.
+                The default is 60 seconds.
index b55b245e805205a07266c9d745b96e4716ed3009..fd1e19f1a49f5803ca6dd5d939ec41ec592af077 100644 (file)
@@ -83,6 +83,18 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
 MODULE_PARM_DESC(max_persistent_grants,
                  "Maximum number of grants to map persistently");
 
+/*
+ * How long a persistent grant is allowed to remain allocated without being in
+ * use. The time is in seconds, 0 means indefinitely long.
+ */
+
+static unsigned int xen_blkif_pgrant_timeout = 60;
+module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout,
+                  uint, 0644);
+MODULE_PARM_DESC(persistent_grant_unused_seconds,
+                "Time in seconds an unused persistent grant is allowed to "
+                "remain allocated. Default is 60, 0 means unlimited.");
+
 /*
  * Maximum number of rings/queues blkback supports, allow as many queues as there
  * are CPUs if user has not specified a value.
@@ -123,6 +135,13 @@ module_param(log_stats, int, 0644);
 /* Number of free pages to remove on each call to gnttab_free_pages */
 #define NUM_BATCH_FREE_PAGES 10
 
+static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
+{
+       return xen_blkif_pgrant_timeout &&
+              (jiffies - persistent_gnt->last_used >=
+               HZ * xen_blkif_pgrant_timeout);
+}
+
 static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
 {
        unsigned long flags;
@@ -236,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
                }
        }
 
-       bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
-       set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+       persistent_gnt->active = true;
        /* Add new node and rebalance tree. */
        rb_link_node(&(persistent_gnt->node), parent, new);
        rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
@@ -261,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
                else if (gref > data->gnt)
                        node = node->rb_right;
                else {
-                       if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+                       if (data->active) {
                                pr_alert_ratelimited("requesting a grant already in use\n");
                                return NULL;
                        }
-                       set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+                       data->active = true;
                        atomic_inc(&ring->persistent_gnt_in_use);
                        return data;
                }
@@ -276,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
 static void put_persistent_gnt(struct xen_blkif_ring *ring,
                                struct persistent_gnt *persistent_gnt)
 {
-       if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+       if (!persistent_gnt->active)
                pr_alert_ratelimited("freeing a grant already unused\n");
-       set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
-       clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+       persistent_gnt->last_used = jiffies;
+       persistent_gnt->active = false;
        atomic_dec(&ring->persistent_gnt_in_use);
 }
 
@@ -371,26 +389,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
        struct persistent_gnt *persistent_gnt;
        struct rb_node *n;
        unsigned int num_clean, total;
-       bool scan_used = false, clean_used = false;
+       bool scan_used = false;
        struct rb_root *root;
 
-       if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
-           (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
-           !ring->blkif->vbd.overflow_max_grants)) {
-               goto out;
-       }
-
        if (work_busy(&ring->persistent_purge_work)) {
                pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
                goto out;
        }
 
-       num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
-       num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
-       num_clean = min(ring->persistent_gnt_c, num_clean);
-       if ((num_clean == 0) ||
-           (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
-               goto out;
+       if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
+           (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+           !ring->blkif->vbd.overflow_max_grants)) {
+               num_clean = 0;
+       } else {
+               num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
+               num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants +
+                           num_clean;
+               num_clean = min(ring->persistent_gnt_c, num_clean);
+               pr_debug("Going to purge at least %u persistent grants\n",
+                        num_clean);
+       }
 
        /*
         * At this point, we can assure that there will be no calls
@@ -401,9 +419,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
          * number of grants.
         */
 
-       total = num_clean;
-
-       pr_debug("Going to purge %u persistent grants\n", num_clean);
+       total = 0;
 
        BUG_ON(!list_empty(&ring->persistent_purge_list));
        root = &ring->persistent_gnts;
@@ -412,46 +428,37 @@ purge_list:
                BUG_ON(persistent_gnt->handle ==
                        BLKBACK_INVALID_HANDLE);
 
-               if (clean_used) {
-                       clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+               if (persistent_gnt->active)
                        continue;
-               }
-
-               if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+               if (!scan_used && !persistent_gnt_timeout(persistent_gnt))
                        continue;
-               if (!scan_used &&
-                   (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+               if (scan_used && total >= num_clean)
                        continue;
 
                rb_erase(&persistent_gnt->node, root);
                list_add(&persistent_gnt->remove_node,
                         &ring->persistent_purge_list);
-               if (--num_clean == 0)
-                       goto finished;
+               total++;
        }
        /*
-        * If we get here it means we also need to start cleaning
+        * Check whether we also need to start cleaning
         * grants that were used since last purge in order to cope
         * with the requested num
         */
-       if (!scan_used && !clean_used) {
-               pr_debug("Still missing %u purged frames\n", num_clean);
+       if (!scan_used && total < num_clean) {
+               pr_debug("Still missing %u purged frames\n", num_clean - total);
                scan_used = true;
                goto purge_list;
        }
-finished:
-       if (!clean_used) {
-               pr_debug("Finished scanning for grants to clean, removing used flag\n");
-               clean_used = true;
-               goto purge_list;
-       }
 
-       ring->persistent_gnt_c -= (total - num_clean);
-       ring->blkif->vbd.overflow_max_grants = 0;
+       if (total) {
+               ring->persistent_gnt_c -= total;
+               ring->blkif->vbd.overflow_max_grants = 0;
 
-       /* We can defer this work */
-       schedule_work(&ring->persistent_purge_work);
-       pr_debug("Purged %u/%u\n", (total - num_clean), total);
+               /* We can defer this work */
+               schedule_work(&ring->persistent_purge_work);
+               pr_debug("Purged %u/%u\n", num_clean, total);
+       }
 
 out:
        return;
index ecb35fe8ca8dbb54f36a85513a09064819acd67a..1d3002d773f7adb151dcf79adf18f919e461290a 100644 (file)
@@ -233,16 +233,6 @@ struct xen_vbd {
 
 struct backend_info;
 
-/* Number of available flags */
-#define PERSISTENT_GNT_FLAGS_SIZE      2
-/* This persistent grant is currently in use */
-#define PERSISTENT_GNT_ACTIVE          0
-/*
- * This persistent grant has been used, this flag is set when we remove the
- * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
- */
-#define PERSISTENT_GNT_WAS_ACTIVE      1
-
 /* Number of requests that we can fit in a ring */
 #define XEN_BLKIF_REQS_PER_PAGE                32
 
@@ -250,7 +240,8 @@ struct persistent_gnt {
        struct page *page;
        grant_ref_t gnt;
        grant_handle_t handle;
-       DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
+       unsigned long last_used;
+       bool active;
        struct rb_node node;
        struct list_head remove_node;
 };
@@ -278,7 +269,6 @@ struct xen_blkif_ring {
        wait_queue_head_t       pending_free_wq;
 
        /* Tree to store persistent grants. */
-       spinlock_t              pers_gnts_lock;
        struct rb_root          persistent_gnts;
        unsigned int            persistent_gnt_c;
        atomic_t                persistent_gnt_in_use;
index 8986adab9bf58540bd8d2ab0744d317fba6c23a0..a71d817e900ddc07ff45d240f0ae290ff408b6cd 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/scatterlist.h>
 #include <linux/bitmap.h>
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
 #include <xen/xen.h>
 #include <xen/xenbus.h>
@@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq)
 
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
+static struct delayed_work blkfront_work;
+static LIST_HEAD(info_list);
 
 /*
  * Maximum number of segments in indirect requests, the actual value used by
@@ -216,6 +219,7 @@ struct blkfront_info
        /* Save uncomplete reqs and bios for migration. */
        struct list_head requests;
        struct bio_list bio_list;
+       struct list_head info_list;
 };
 
 static unsigned int nr_minors;
@@ -1759,6 +1763,12 @@ abort_transaction:
        return err;
 }
 
+static void free_info(struct blkfront_info *info)
+{
+       list_del(&info->info_list);
+       kfree(info);
+}
+
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_blkback(struct xenbus_device *dev,
                           struct blkfront_info *info)
@@ -1880,7 +1890,10 @@ again:
  destroy_blkring:
        blkif_free(info, 0);
 
-       kfree(info);
+       mutex_lock(&blkfront_mutex);
+       free_info(info);
+       mutex_unlock(&blkfront_mutex);
+
        dev_set_drvdata(&dev->dev, NULL);
 
        return err;
@@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev,
        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
        dev_set_drvdata(&dev->dev, info);
 
+       mutex_lock(&blkfront_mutex);
+       list_add(&info->info_list, &info_list);
+       mutex_unlock(&blkfront_mutex);
+
        return 0;
 }
 
@@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
        if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST)
                indirect_segments = 0;
        info->max_indirect_segments = indirect_segments;
+
+       if (info->feature_persistent) {
+               mutex_lock(&blkfront_mutex);
+               schedule_delayed_work(&blkfront_work, HZ * 10);
+               mutex_unlock(&blkfront_mutex);
+       }
 }
 
 /*
@@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
        mutex_unlock(&info->mutex);
 
        if (!bdev) {
-               kfree(info);
+               mutex_lock(&blkfront_mutex);
+               free_info(info);
+               mutex_unlock(&blkfront_mutex);
                return 0;
        }
 
@@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
        if (info && !bdev->bd_openers) {
                xlvbd_release_gendisk(info);
                disk->private_data = NULL;
-               kfree(info);
+               mutex_lock(&blkfront_mutex);
+               free_info(info);
+               mutex_unlock(&blkfront_mutex);
        }
 
        mutex_unlock(&bdev->bd_mutex);
@@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
                dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
                xlvbd_release_gendisk(info);
                disk->private_data = NULL;
-               kfree(info);
+               free_info(info);
        }
 
 out:
@@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = {
        .is_ready = blkfront_is_ready,
 };
 
+static void purge_persistent_grants(struct blkfront_info *info)
+{
+       unsigned int i;
+       unsigned long flags;
+
+       for (i = 0; i < info->nr_rings; i++) {
+               struct blkfront_ring_info *rinfo = &info->rinfo[i];
+               struct grant *gnt_list_entry, *tmp;
+
+               spin_lock_irqsave(&rinfo->ring_lock, flags);
+
+               if (rinfo->persistent_gnts_c == 0) {
+                       spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+                       continue;
+               }
+
+               list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
+                                        node) {
+                       if (gnt_list_entry->gref == GRANT_INVALID_REF ||
+                           gnttab_query_foreign_access(gnt_list_entry->gref))
+                               continue;
+
+                       list_del(&gnt_list_entry->node);
+                       gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+                       rinfo->persistent_gnts_c--;
+                       __free_page(gnt_list_entry->page);
+                       kfree(gnt_list_entry);
+               }
+
+               spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+       }
+}
+
+static void blkfront_delay_work(struct work_struct *work)
+{
+       struct blkfront_info *info;
+       bool need_schedule_work = false;
+
+       mutex_lock(&blkfront_mutex);
+
+       list_for_each_entry(info, &info_list, info_list) {
+               if (info->feature_persistent) {
+                       need_schedule_work = true;
+                       mutex_lock(&info->mutex);
+                       purge_persistent_grants(info);
+                       mutex_unlock(&info->mutex);
+               }
+       }
+
+       if (need_schedule_work)
+               schedule_delayed_work(&blkfront_work, HZ * 10);
+
+       mutex_unlock(&blkfront_mutex);
+}
+
 static int __init xlblk_init(void)
 {
        int ret;
@@ -2626,6 +2708,15 @@ static int __init xlblk_init(void)
        if (!xen_domain())
                return -ENODEV;
 
+       if (!xen_has_pv_disk_devices())
+               return -ENODEV;
+
+       if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
+               pr_warn("xen_blk: can't get major %d with name %s\n",
+                       XENVBD_MAJOR, DEV_NAME);
+               return -ENODEV;
+       }
+
        if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
                xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
 
@@ -2641,14 +2732,7 @@ static int __init xlblk_init(void)
                xen_blkif_max_queues = nr_cpus;
        }
 
-       if (!xen_has_pv_disk_devices())
-               return -ENODEV;
-
-       if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
-               printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
-                      XENVBD_MAJOR, DEV_NAME);
-               return -ENODEV;
-       }
+       INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work);
 
        ret = xenbus_register_frontend(&blkfront_driver);
        if (ret) {
@@ -2663,6 +2747,8 @@ module_init(xlblk_init);
 
 static void __exit xlblk_exit(void)
 {
+       cancel_delayed_work_sync(&blkfront_work);
+
        xenbus_unregister_driver(&blkfront_driver);
        unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
        kfree(minors);