Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 10 May 2017 18:33:08 +0000 (11:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 10 May 2017 18:33:08 +0000 (11:33 -0700)
Pull virtio updates from Michael Tsirkin:
 "Fixes, cleanups, performance

  A bunch of changes to virtio, most affecting virtio net. Also ptr_ring
  batched zeroing - first of batching enhancements that seems ready."

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  s390/virtio: change maintainership
  tools/virtio: fix spelling mistake: "wakeus" -> "wakeups"
  virtio_net: tidy a couple debug statements
  ptr_ring: support testing different batching sizes
  ringtest: support test specific parameters
  ptr_ring: batch ring zeroing
  virtio: virtio_driver doc
  virtio_net: don't reset twice on XDP on/off
  virtio_net: fix support for small rings
  virtio_net: reduce alignment for buffers
  virtio_net: rework mergeable buffer handling
  virtio_net: allow specifying context for rx
  virtio: allow extra context per descriptor
  tools/virtio: fix build breakage
  virtio: add context flag to find vqs
  virtio: wrap find_vqs
  ringtest: fix an assert statement

32 files changed:
MAINTAINERS
drivers/block/virtio_blk.c
drivers/char/virtio_console.c
drivers/crypto/virtio/virtio_crypto_core.c
drivers/gpu/drm/virtio/virtgpu_kms.c
drivers/misc/mic/vop/vop_main.c
drivers/net/caif/caif_virtio.c
drivers/net/virtio_net.c
drivers/remoteproc/remoteproc_virtio.c
drivers/rpmsg/virtio_rpmsg_bus.c
drivers/s390/virtio/kvm_virtio.c
drivers/s390/virtio/virtio_ccw.c
drivers/scsi/virtio_scsi.c
drivers/virtio/virtio_balloon.c
drivers/virtio/virtio_input.c
drivers/virtio/virtio_mmio.c
drivers/virtio/virtio_pci_common.c
drivers/virtio/virtio_pci_common.h
drivers/virtio/virtio_pci_legacy.c
drivers/virtio/virtio_pci_modern.c
drivers/virtio/virtio_ring.c
include/linux/ptr_ring.h
include/linux/virtio.h
include/linux/virtio_config.h
include/linux/virtio_ring.h
net/vmw_vsock/virtio_transport.c
tools/virtio/linux/virtio.h
tools/virtio/ringtest/main.c
tools/virtio/ringtest/main.h
tools/virtio/ringtest/ptr_ring.c
tools/virtio/virtio_test.c
tools/virtio/vringh_test.c

index 2decf40d28e1f8131d7f4b65de4639830c492fe9..66b1e1a2d203417f62ba0839c9687c40749deb95 100644 (file)
@@ -13556,8 +13556,8 @@ F:      include/uapi/linux/virtio_*.h
 F:     drivers/crypto/virtio/
 
 VIRTIO DRIVERS FOR S390
-M:     Christian Borntraeger <borntraeger@de.ibm.com>
 M:     Cornelia Huck <cornelia.huck@de.ibm.com>
+M:     Halil Pasic <pasic@linux.vnet.ibm.com>
 L:     linux-s390@vger.kernel.org
 L:     virtualization@lists.linux-foundation.org
 L:     kvm@vger.kernel.org
index 94173de1efaab18b07924ed0c8431dbb6ba5ca34..553cc4c542b4f13a5a04d4ca48af24198401c9f8 100644 (file)
@@ -452,8 +452,7 @@ static int init_vq(struct virtio_blk *vblk)
        }
 
        /* Discover virtqueues and write information to configuration.  */
-       err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-                       &desc);
+       err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
        if (err)
                goto out;
 
index 7d041d026680d337a06fcb4fabf0f4ff955612c6..ad843eb02ae7be21ae7470c6cc840a30a3f6e272 100644 (file)
@@ -1945,9 +1945,9 @@ static int init_vqs(struct ports_device *portdev)
                }
        }
        /* Find the queues. */
-       err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
-                                             io_callbacks,
-                                             (const char **)io_names, NULL);
+       err = virtio_find_vqs(portdev->vdev, nr_queues, vqs,
+                             io_callbacks,
+                             (const char **)io_names, NULL);
        if (err)
                goto free;
 
index 21472e427f6fe723f757a83f98c42c2f3ec78d68..a111cd72797b19e8a589ef846101d4589e90e199 100644 (file)
@@ -119,8 +119,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
                names[i] = vi->data_vq[i].name;
        }
 
-       ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-                                        names, NULL);
+       ret = virtio_find_vqs(vi->vdev, total_vqs, vqs, callbacks, names, NULL);
        if (ret)
                goto err_find;
 
index 491866865c3397b4c2e73fdfc3444aac301f9ee4..1e1c90b30d4ad9b6cac056169d6f6eea7acede00 100644 (file)
@@ -175,8 +175,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
        DRM_INFO("virgl 3d acceleration not supported by guest\n");
 #endif
 
-       ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
-                                           callbacks, names, NULL);
+       ret = virtio_find_vqs(vgdev->vdev, 2, vqs, callbacks, names, NULL);
        if (ret) {
                DRM_ERROR("failed to find virt queues\n");
                goto err_vqs;
index c2e29d7f0de88838d18cdb52c965dda55e16f774..a341938c7e2c6738027dd4df53cdf4da0f8b6bd1 100644 (file)
@@ -278,7 +278,7 @@ static void vop_del_vqs(struct virtio_device *dev)
 static struct virtqueue *vop_find_vq(struct virtio_device *dev,
                                     unsigned index,
                                     void (*callback)(struct virtqueue *vq),
-                                    const char *name)
+                                    const char *name, bool ctx)
 {
        struct _vop_vdev *vdev = to_vopvdev(dev);
        struct vop_device *vpdev = vdev->vpdev;
@@ -314,6 +314,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
                                le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN,
                                dev,
                                false,
+                               ctx,
                                (void __force *)va, vop_notify, callback, name);
        if (!vq) {
                err = -ENOMEM;
@@ -374,7 +375,8 @@ unmap:
 static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
                        struct virtqueue *vqs[],
                        vq_callback_t *callbacks[],
-                       const char * const names[], struct irq_affinity *desc)
+                       const char * const names[], const bool *ctx,
+                       struct irq_affinity *desc)
 {
        struct _vop_vdev *vdev = to_vopvdev(dev);
        struct vop_device *vpdev = vdev->vpdev;
@@ -388,7 +390,8 @@ static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
        for (i = 0; i < nvqs; ++i) {
                dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
                        __func__, i, names[i]);
-               vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i]);
+               vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i],
+                                    ctx ? ctx[i] : false);
                if (IS_ERR(vqs[i])) {
                        err = PTR_ERR(vqs[i]);
                        goto error;
index bc0eb47ecceea7891c22e309f604ece0b0de9807..6122768c86444ec5b9c5fa67d7a63aa2690ba2f5 100644 (file)
@@ -679,8 +679,7 @@ static int cfv_probe(struct virtio_device *vdev)
                goto err;
 
        /* Get the TX virtio ring. This is a "guest side vring". */
-       err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names,
-                       NULL);
+       err = virtio_find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names, NULL);
        if (err)
                goto err;
 
index 1c6d3923c224aad638802a8de531915c027f0b1b..9320d96a1632bbebe8bd1d4a04059e0df631ac19 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/average.h>
+#include <net/route.h>
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -54,17 +55,6 @@ module_param(napi_tx, bool, 0644);
  */
 DECLARE_EWMA(pkt_len, 0, 64)
 
-/* With mergeable buffers we align buffer address and use the low bits to
- * encode its true size. Buffer size is up to 1 page so we need to align to
- * square root of page size to ensure we reserve enough bits to encode the true
- * size.
- */
-#define MERGEABLE_BUFFER_MIN_ALIGN_SHIFT ((PAGE_SHIFT + 1) / 2)
-
-/* Minimum alignment for mergeable packet buffers. */
-#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, \
-                                  1 << MERGEABLE_BUFFER_MIN_ALIGN_SHIFT)
-
 #define VIRTNET_DRIVER_VERSION "1.0.0"
 
 struct virtnet_stats {
@@ -112,6 +102,9 @@ struct receive_queue {
        /* RX: fragments + linear part + virtio header */
        struct scatterlist sg[MAX_SKB_FRAGS + 2];
 
+       /* Min single buffer size for mergeable buffers case. */
+       unsigned int min_buf_len;
+
        /* Name of this receive queue: input.$index */
        char name[40];
 };
@@ -277,24 +270,6 @@ static void skb_xmit_done(struct virtqueue *vq)
                netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
-static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
-{
-       unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
-       return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
-}
-
-static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
-{
-       return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
-
-}
-
-static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
-{
-       unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
-       return (unsigned long)buf | (size - 1);
-}
-
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
                                   struct receive_queue *rq,
@@ -538,15 +513,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 
        while (--*num_buf) {
                unsigned int buflen;
-               unsigned long ctx;
                void *buf;
                int off;
 
-               ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
-               if (unlikely(!ctx))
+               buf = virtqueue_get_buf(rq->vq, &buflen);
+               if (unlikely(!buf))
                        goto err_buf;
 
-               buf = mergeable_ctx_to_buf_address(ctx);
                p = virt_to_head_page(buf);
                off = buf - page_address(p);
 
@@ -575,10 +548,10 @@ err_buf:
 static struct sk_buff *receive_mergeable(struct net_device *dev,
                                         struct virtnet_info *vi,
                                         struct receive_queue *rq,
-                                        unsigned long ctx,
+                                        void *buf,
+                                        void *ctx,
                                         unsigned int len)
 {
-       void *buf = mergeable_ctx_to_buf_address(ctx);
        struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
        u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
        struct page *page = virt_to_head_page(buf);
@@ -666,7 +639,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        }
        rcu_read_unlock();
 
-       truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
+       if (unlikely(len > (unsigned long)ctx)) {
+               pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+                        dev->name, len, (unsigned long)ctx);
+               dev->stats.rx_length_errors++;
+               goto err_skb;
+       }
+       truesize = (unsigned long)ctx;
        head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
        curr_skb = head_skb;
 
@@ -675,7 +654,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        while (--num_buf) {
                int num_skb_frags;
 
-               ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
+               buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
                if (unlikely(!ctx)) {
                        pr_debug("%s: rx error: %d buffers out of %d missing\n",
                                 dev->name, num_buf,
@@ -685,8 +664,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                        goto err_buf;
                }
 
-               buf = mergeable_ctx_to_buf_address(ctx);
                page = virt_to_head_page(buf);
+               if (unlikely(len > (unsigned long)ctx)) {
+                       pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+                                dev->name, len, (unsigned long)ctx);
+                       dev->stats.rx_length_errors++;
+                       goto err_skb;
+               }
+               truesize = (unsigned long)ctx;
 
                num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
                if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -702,7 +687,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                        head_skb->truesize += nskb->truesize;
                        num_skb_frags = 0;
                }
-               truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
                if (curr_skb != head_skb) {
                        head_skb->data_len += len;
                        head_skb->len += len;
@@ -727,14 +711,14 @@ err_xdp:
 err_skb:
        put_page(page);
        while (--num_buf) {
-               ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
-               if (unlikely(!ctx)) {
+               buf = virtqueue_get_buf(rq->vq, &len);
+               if (unlikely(!buf)) {
                        pr_debug("%s: rx error: %d buffers missing\n",
                                 dev->name, num_buf);
                        dev->stats.rx_length_errors++;
                        break;
                }
-               page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
+               page = virt_to_head_page(buf);
                put_page(page);
        }
 err_buf:
@@ -745,7 +729,7 @@ xdp_xmit:
 }
 
 static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
-                      void *buf, unsigned int len)
+                      void *buf, unsigned int len, void **ctx)
 {
        struct net_device *dev = vi->dev;
        struct sk_buff *skb;
@@ -756,9 +740,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
                pr_debug("%s: short packet %i\n", dev->name, len);
                dev->stats.rx_length_errors++;
                if (vi->mergeable_rx_bufs) {
-                       unsigned long ctx = (unsigned long)buf;
-                       void *base = mergeable_ctx_to_buf_address(ctx);
-                       put_page(virt_to_head_page(base));
+                       put_page(virt_to_head_page(buf));
                } else if (vi->big_packets) {
                        give_pages(rq, buf);
                } else {
@@ -768,7 +750,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
        }
 
        if (vi->mergeable_rx_bufs)
-               skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len);
+               skb = receive_mergeable(dev, vi, rq, buf, ctx, len);
        else if (vi->big_packets)
                skb = receive_big(dev, vi, rq, buf, len);
        else
@@ -880,14 +862,15 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
        return err;
 }
 
-static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
+static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
+                                         struct ewma_pkt_len *avg_pkt_len)
 {
        const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
        unsigned int len;
 
        len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
-                       GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
-       return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
+                               rq->min_buf_len - hdr_len, PAGE_SIZE - hdr_len);
+       return ALIGN(len, L1_CACHE_BYTES);
 }
 
 static int add_recvbuf_mergeable(struct virtnet_info *vi,
@@ -896,17 +879,17 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
        struct page_frag *alloc_frag = &rq->alloc_frag;
        unsigned int headroom = virtnet_get_headroom(vi);
        char *buf;
-       unsigned long ctx;
+       void *ctx;
        int err;
        unsigned int len, hole;
 
-       len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
+       len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
        if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
                return -ENOMEM;
 
        buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
        buf += headroom; /* advance address leaving hole at front of pkt */
-       ctx = mergeable_buf_to_ctx(buf, len);
+       ctx = (void *)(unsigned long)len;
        get_page(alloc_frag->page);
        alloc_frag->offset += len + headroom;
        hole = alloc_frag->size - alloc_frag->offset;
@@ -921,7 +904,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
        }
 
        sg_init_one(rq->sg, buf, len);
-       err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
+       err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
        if (err < 0)
                put_page(virt_to_head_page(buf));
 
@@ -1032,10 +1015,20 @@ static int virtnet_receive(struct receive_queue *rq, int budget)
        void *buf;
        struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-       while (received < budget &&
-              (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
-               bytes += receive_buf(vi, rq, buf, len);
-               received++;
+       if (vi->mergeable_rx_bufs) {
+               void *ctx;
+
+               while (received < budget &&
+                      (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
+                       bytes += receive_buf(vi, rq, buf, len, ctx);
+                       received++;
+               }
+       } else {
+               while (received < budget &&
+                      (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+                       bytes += receive_buf(vi, rq, buf, len, NULL);
+                       received++;
+               }
        }
 
        if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
@@ -1854,7 +1847,6 @@ static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
        virtnet_freeze_down(dev);
        _remove_vq_common(vi);
 
-       dev->config->reset(dev);
        virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
        virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
@@ -2118,9 +2110,7 @@ static void free_unused_bufs(struct virtnet_info *vi)
 
                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
                        if (vi->mergeable_rx_bufs) {
-                               unsigned long ctx = (unsigned long)buf;
-                               void *base = mergeable_ctx_to_buf_address(ctx);
-                               put_page(virt_to_head_page(base));
+                               put_page(virt_to_head_page(buf));
                        } else if (vi->big_packets) {
                                give_pages(&vi->rq[i], buf);
                        } else {
@@ -2141,6 +2131,21 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
        virtnet_free_queues(vi);
 }
 
+/* How large should a single buffer be so a queue full of these can fit at
+ * least one full packet?
+ * Logic below assumes the mergeable buffer header is used.
+ */
+static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
+{
+       const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+       unsigned int rq_size = virtqueue_get_vring_size(vq);
+       unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
+       unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
+       unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
+
+       return max(min_buf_len, hdr_len);
+}
+
 static int virtnet_find_vqs(struct virtnet_info *vi)
 {
        vq_callback_t **callbacks;
@@ -2148,6 +2153,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
        int ret = -ENOMEM;
        int i, total_vqs;
        const char **names;
+       bool *ctx;
 
        /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
         * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
@@ -2166,6 +2172,13 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
        names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
        if (!names)
                goto err_names;
+       if (vi->mergeable_rx_bufs) {
+               ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
+               if (!ctx)
+                       goto err_ctx;
+       } else {
+               ctx = NULL;
+       }
 
        /* Parameters for control virtqueue, if any */
        if (vi->has_cvq) {
@@ -2181,10 +2194,12 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
                sprintf(vi->sq[i].name, "output.%d", i);
                names[rxq2vq(i)] = vi->rq[i].name;
                names[txq2vq(i)] = vi->sq[i].name;
+               if (ctx)
+                       ctx[rxq2vq(i)] = true;
        }
 
        ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-                                        names, NULL);
+                                        names, ctx, NULL);
        if (ret)
                goto err_find;
 
@@ -2196,6 +2211,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
        for (i = 0; i < vi->max_queue_pairs; i++) {
                vi->rq[i].vq = vqs[rxq2vq(i)];
+               vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
                vi->sq[i].vq = vqs[txq2vq(i)];
        }
 
@@ -2206,6 +2222,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
        return 0;
 
 err_find:
+       kfree(ctx);
+err_ctx:
        kfree(names);
 err_names:
        kfree(callbacks);
@@ -2282,7 +2300,8 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
 
        BUG_ON(queue_index >= vi->max_queue_pairs);
        avg = &vi->rq[queue_index].mrg_avg_pkt_len;
-       return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
+       return sprintf(buf, "%u\n",
+                      get_mergeable_buf_len(&vi->rq[queue_index], avg));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
index 0142cc3f0c91c6fe98fe4222741819692592b25b..294634836b321d01d2adbf5cd2613b488c5eb44f 100644 (file)
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(rproc_vq_interrupt);
 static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
                                    unsigned int id,
                                    void (*callback)(struct virtqueue *vq),
-                                   const char *name)
+                                   const char *name, bool ctx)
 {
        struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
        struct rproc *rproc = vdev_to_rproc(vdev);
@@ -103,8 +103,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
         * Create the new vq, and tell virtio we're not interested in
         * the 'weak' smp barriers, since we're talking with a real device.
         */
-       vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr,
-                                rproc_virtio_notify, callback, name);
+       vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx,
+                                addr, rproc_virtio_notify, callback, name);
        if (!vq) {
                dev_err(dev, "vring_new_virtqueue %s failed\n", name);
                rproc_free_vring(rvring);
@@ -138,12 +138,14 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
                                 struct virtqueue *vqs[],
                                 vq_callback_t *callbacks[],
                                 const char * const names[],
+                                const bool * ctx,
                                 struct irq_affinity *desc)
 {
        int i, ret;
 
        for (i = 0; i < nvqs; ++i) {
-               vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i]);
+               vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i],
+                                   ctx ? ctx[i] : false);
                if (IS_ERR(vqs[i])) {
                        ret = PTR_ERR(vqs[i]);
                        goto error;
index 5e66e081027e56ce00ac9273752a5c2bc34ea7c6..f7cade09d38a3d3476a14dfef67adba84bfad90f 100644 (file)
@@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
        init_waitqueue_head(&vrp->sendq);
 
        /* We expect two virtqueues, rx and tx (and in this order) */
-       err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
+       err = virtio_find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
        if (err)
                goto free_vrp;
 
index 2ce0b3eb2efebc89121b8cd938d3677e936bffa4..a99d09a11f05eed1e317de5e8f06c549ab760b8b 100644 (file)
@@ -189,7 +189,7 @@ static bool kvm_notify(struct virtqueue *vq)
 static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
                                     unsigned index,
                                     void (*callback)(struct virtqueue *vq),
-                                    const char *name)
+                                    const char *name, bool ctx)
 {
        struct kvm_device *kdev = to_kvmdev(vdev);
        struct kvm_vqconfig *config;
@@ -211,7 +211,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
                goto out;
 
        vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN,
-                                vdev, true, (void *) config->address,
+                                vdev, true, ctx, (void *) config->address,
                                 kvm_notify, callback, name);
        if (!vq) {
                err = -ENOMEM;
@@ -256,6 +256,7 @@ static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                        struct virtqueue *vqs[],
                        vq_callback_t *callbacks[],
                        const char * const names[],
+                       const bool *ctx,
                        struct irq_affinity *desc)
 {
        struct kvm_device *kdev = to_kvmdev(vdev);
@@ -266,7 +267,8 @@ static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                return -ENOENT;
 
        for (i = 0; i < nvqs; ++i) {
-               vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
+               vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i],
+                                    ctx ? ctx[i] : false);
                if (IS_ERR(vqs[i]))
                        goto error;
        }
index 0ed209f3d8b0c5d572a712e5943b358ec19f3bb0..2a76ea78a0bf1ae9f62508e701cca07a9f690d1f 100644 (file)
@@ -484,7 +484,7 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev)
 
 static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
                                             int i, vq_callback_t *callback,
-                                            const char *name,
+                                            const char *name, bool ctx,
                                             struct ccw1 *ccw)
 {
        struct virtio_ccw_device *vcdev = to_vc_device(vdev);
@@ -522,7 +522,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
        }
 
        vq = vring_new_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN, vdev,
-                                true, info->queue, virtio_ccw_kvm_notify,
+                                true, ctx, info->queue, virtio_ccw_kvm_notify,
                                 callback, name);
        if (!vq) {
                /* For now, we fail if we can't get the requested size. */
@@ -629,6 +629,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                               struct virtqueue *vqs[],
                               vq_callback_t *callbacks[],
                               const char * const names[],
+                              const bool *ctx,
                               struct irq_affinity *desc)
 {
        struct virtio_ccw_device *vcdev = to_vc_device(vdev);
@@ -642,7 +643,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 
        for (i = 0; i < nvqs; ++i) {
                vqs[i] = virtio_ccw_setup_vq(vdev, i, callbacks[i], names[i],
-                                            ccw);
+                                            ctx ? ctx[i] : false, ccw);
                if (IS_ERR(vqs[i])) {
                        ret = PTR_ERR(vqs[i]);
                        vqs[i] = NULL;
index a29d068b76962df07b67f4885e0c67d4c3342f64..f8dbfeee6c63f24d26f810d39b36405e03cdbbef 100644 (file)
@@ -894,8 +894,7 @@ static int virtscsi_init(struct virtio_device *vdev,
        }
 
        /* Discover virtqueues and write information to configuration.  */
-       err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-                       &desc);
+       err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
        if (err)
                goto out;
 
index 34adf9b9c0538815db33f62ed842de49be5222e7..408c174ef0d5c076dc2f51da612b8e0f3dfa8269 100644 (file)
@@ -418,8 +418,7 @@ static int init_vqs(struct virtio_balloon *vb)
         * optionally stat.
         */
        nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-       err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
-                       NULL);
+       err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
        if (err)
                return err;
 
index 79f1293cda9327a051feef083871bdbc62038496..3a0468f2ceb08a3c4cd3eac033477b8049ebd89d 100644 (file)
@@ -173,8 +173,7 @@ static int virtinput_init_vqs(struct virtio_input *vi)
        static const char * const names[] = { "events", "status" };
        int err;
 
-       err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names,
-                       NULL);
+       err = virtio_find_vqs(vi->vdev, 2, vqs, cbs, names, NULL);
        if (err)
                return err;
        vi->evt = vqs[0];
index 78343b8f9034b35ea7d18e6f8a5b3e3df4bae9e0..74dc7170fd351e02d1732357b0705f66c507f7ce 100644 (file)
@@ -351,7 +351,7 @@ static void vm_del_vqs(struct virtio_device *vdev)
 
 static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
                                  void (*callback)(struct virtqueue *vq),
-                                 const char *name)
+                                 const char *name, bool ctx)
 {
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
        struct virtio_mmio_vq_info *info;
@@ -388,7 +388,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 
        /* Create the vring */
        vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
-                                true, true, vm_notify, callback, name);
+                                true, true, ctx, vm_notify, callback, name);
        if (!vq) {
                err = -ENOMEM;
                goto error_new_virtqueue;
@@ -447,6 +447,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                       struct virtqueue *vqs[],
                       vq_callback_t *callbacks[],
                       const char * const names[],
+                      const bool *ctx,
                       struct irq_affinity *desc)
 {
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
@@ -459,7 +460,8 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                return err;
 
        for (i = 0; i < nvqs; ++i) {
-               vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
+               vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
+                                    ctx ? ctx[i] : false);
                if (IS_ERR(vqs[i])) {
                        vm_del_vqs(vdev);
                        return PTR_ERR(vqs[i]);
index 698d5d06fa039ca1a27b151a3dcf5d322784e3f0..007a4f3660862e1aa6e9a16207ae54bbbab61d58 100644 (file)
@@ -172,6 +172,7 @@ error:
 static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
                                     void (*callback)(struct virtqueue *vq),
                                     const char *name,
+                                    bool ctx,
                                     u16 msix_vec)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -183,7 +184,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
        if (!info)
                return ERR_PTR(-ENOMEM);
 
-       vq = vp_dev->setup_vq(vp_dev, info, index, callback, name,
+       vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
                              msix_vec);
        if (IS_ERR(vq))
                goto out_info;
@@ -274,6 +275,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
                struct virtqueue *vqs[], vq_callback_t *callbacks[],
                const char * const names[], bool per_vq_vectors,
+               const bool *ctx,
                struct irq_affinity *desc)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -315,6 +317,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
                else
                        msix_vec = VP_MSIX_VQ_VECTOR;
                vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+                                    ctx ? ctx[i] : false,
                                     msix_vec);
                if (IS_ERR(vqs[i])) {
                        err = PTR_ERR(vqs[i]);
@@ -345,7 +348,7 @@ error_find:
 
 static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
                struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[])
+               const char * const names[], const bool *ctx)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
        int i, err;
@@ -367,6 +370,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
                        continue;
                }
                vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+                                    ctx ? ctx[i] : false,
                                     VIRTIO_MSI_NO_VECTOR);
                if (IS_ERR(vqs[i])) {
                        err = PTR_ERR(vqs[i]);
@@ -383,20 +387,21 @@ out_del_vqs:
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[], struct irq_affinity *desc)
+               const char * const names[], const bool *ctx,
+               struct irq_affinity *desc)
 {
        int err;
 
        /* Try MSI-X with one vector per queue. */
-       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc);
+       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc);
        if (!err)
                return 0;
        /* Fallback: MSI-X with one vector for config, one shared for queues. */
-       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc);
+       err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc);
        if (!err)
                return 0;
        /* Finally fall back to regular interrupts. */
-       return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
+       return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
 }
 
 const char *vp_bus_name(struct virtio_device *vdev)
index e96334aec1e0d70842d1a9fc53462ab728be87c0..135ee3cf7175881a8259a192ba102978196687c7 100644 (file)
@@ -102,6 +102,7 @@ struct virtio_pci_device {
                                      unsigned idx,
                                      void (*callback)(struct virtqueue *vq),
                                      const char *name,
+                                     bool ctx,
                                      u16 msix_vec);
        void (*del_vq)(struct virtio_pci_vq_info *info);
 
@@ -131,7 +132,8 @@ void vp_del_vqs(struct virtio_device *vdev);
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
                struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[], struct irq_affinity *desc);
+               const char * const names[], const bool *ctx,
+               struct irq_affinity *desc);
 const char *vp_bus_name(struct virtio_device *vdev);
 
 /* Setup the affinity for a virtqueue:
index 4bfa48fb1324660f82ae6272d2e1ecc33522ba3e..2780886e8ba3d393ba4847366983c2ab56a7ed3e 100644 (file)
@@ -116,6 +116,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
                                  unsigned index,
                                  void (*callback)(struct virtqueue *vq),
                                  const char *name,
+                                 bool ctx,
                                  u16 msix_vec)
 {
        struct virtqueue *vq;
@@ -135,7 +136,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        /* create the vring */
        vq = vring_create_virtqueue(index, num,
                                    VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
-                                   true, false, vp_notify, callback, name);
+                                   true, false, ctx,
+                                   vp_notify, callback, name);
        if (!vq)
                return ERR_PTR(-ENOMEM);
 
index 8978f109d2d79828e5b0c12649debc481dfacd7f..2555d80f6eec4b4a78860b46f453092051b50a24 100644 (file)
@@ -297,6 +297,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
                                  unsigned index,
                                  void (*callback)(struct virtqueue *vq),
                                  const char *name,
+                                 bool ctx,
                                  u16 msix_vec)
 {
        struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
@@ -328,7 +329,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        /* create the vring */
        vq = vring_create_virtqueue(index, num,
                                    SMP_CACHE_BYTES, &vp_dev->vdev,
-                                   true, true, vp_notify, callback, name);
+                                   true, true, ctx,
+                                   vp_notify, callback, name);
        if (!vq)
                return ERR_PTR(-ENOMEM);
 
@@ -387,12 +389,14 @@ err_map_notify:
 }
 
 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-               struct virtqueue *vqs[], vq_callback_t *callbacks[],
-               const char * const names[], struct irq_affinity *desc)
+                             struct virtqueue *vqs[],
+                             vq_callback_t *callbacks[],
+                             const char * const names[], const bool *ctx,
+                             struct irq_affinity *desc)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
        struct virtqueue *vq;
-       int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
+       int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
 
        if (rc)
                return rc;
index 409aeaa49246a0edd7c6da07ca38b58c3f876109..5e1b548828e60745ba87581d2b9bcb6380b092f8 100644 (file)
@@ -263,6 +263,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
                                unsigned int out_sgs,
                                unsigned int in_sgs,
                                void *data,
+                               void *ctx,
                                gfp_t gfp)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
@@ -275,6 +276,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
        START_USE(vq);
 
        BUG_ON(data == NULL);
+       BUG_ON(ctx && vq->indirect);
 
        if (unlikely(vq->broken)) {
                END_USE(vq);
@@ -389,6 +391,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
        vq->desc_state[head].data = data;
        if (indirect)
                vq->desc_state[head].indir_desc = desc;
+       if (ctx)
+               vq->desc_state[head].indir_desc = ctx;
 
        /* Put entry in available array (but don't update avail->idx until they
         * do sync). */
@@ -461,7 +465,8 @@ int virtqueue_add_sgs(struct virtqueue *_vq,
                for (sg = sgs[i]; sg; sg = sg_next(sg))
                        total_sg++;
        }
-       return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
+       return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
+                            data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
 
@@ -483,7 +488,7 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
                         void *data,
                         gfp_t gfp)
 {
-       return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
+       return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
 
@@ -505,10 +510,34 @@ int virtqueue_add_inbuf(struct virtqueue *vq,
                        void *data,
                        gfp_t gfp)
 {
-       return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
+       return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
 
+/**
+ * virtqueue_add_inbuf_ctx - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @ctx: extra context for the token
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+                       struct scatterlist *sg, unsigned int num,
+                       void *data,
+                       void *ctx,
+                       gfp_t gfp)
+{
+       return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+
 /**
  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  * @vq: the struct virtqueue
@@ -598,7 +627,8 @@ bool virtqueue_kick(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
+static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
+                      void **ctx)
 {
        unsigned int i, j;
        __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
@@ -622,10 +652,15 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
        /* Plus final descriptor */
        vq->vq.num_free++;
 
-       /* Free the indirect table, if any, now that it's unmapped. */
-       if (vq->desc_state[head].indir_desc) {
+       if (vq->indirect) {
                struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
-               u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
+               u32 len;
+
+               /* Free the indirect table, if any, now that it's unmapped. */
+               if (!indir_desc)
+                       return;
+
+               len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
 
                BUG_ON(!(vq->vring.desc[head].flags &
                         cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
@@ -634,8 +669,10 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
                for (j = 0; j < len / sizeof(struct vring_desc); j++)
                        vring_unmap_one(vq, &indir_desc[j]);
 
-               kfree(vq->desc_state[head].indir_desc);
+               kfree(indir_desc);
                vq->desc_state[head].indir_desc = NULL;
+       } else if (ctx) {
+               *ctx = vq->desc_state[head].indir_desc;
        }
 }
 
@@ -660,7 +697,8 @@ static inline bool more_used(const struct vring_virtqueue *vq)
  * Returns NULL if there are no used buffers, or the "data" token
  * handed to virtqueue_add_*().
  */
-void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
+void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
+                           void **ctx)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
        void *ret;
@@ -698,7 +736,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 
        /* detach_buf clears data, so grab it now. */
        ret = vq->desc_state[i].data;
-       detach_buf(vq, i);
+       detach_buf(vq, i, ctx);
        vq->last_used_idx++;
        /* If we expect an interrupt for the next entry, tell host
         * by writing event index and flush out the write before
@@ -715,8 +753,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
        END_USE(vq);
        return ret;
 }
-EXPORT_SYMBOL_GPL(virtqueue_get_buf);
+EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
+void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
+{
+       return virtqueue_get_buf_ctx(_vq, len, NULL);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 /**
  * virtqueue_disable_cb - disable callbacks
  * @vq: the struct virtqueue we're talking about.
@@ -878,7 +921,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
                        continue;
                /* detach_buf clears data, so grab it now. */
                buf = vq->desc_state[i].data;
-               detach_buf(vq, i);
+               detach_buf(vq, i, NULL);
                vq->avail_idx_shadow--;
                vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
                END_USE(vq);
@@ -916,6 +959,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
                                        struct vring vring,
                                        struct virtio_device *vdev,
                                        bool weak_barriers,
+                                       bool context,
                                        bool (*notify)(struct virtqueue *),
                                        void (*callback)(struct virtqueue *),
                                        const char *name)
@@ -950,7 +994,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
        vq->last_add_time_valid = false;
 #endif
 
-       vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+       vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
+               !context;
        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
        /* No callback?  Tell other side not to bother us. */
@@ -1019,6 +1064,7 @@ struct virtqueue *vring_create_virtqueue(
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
+       bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name)
@@ -1058,7 +1104,7 @@ struct virtqueue *vring_create_virtqueue(
        queue_size_in_bytes = vring_size(num, vring_align);
        vring_init(&vring, num, queue, vring_align);
 
-       vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+       vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
                                   notify, callback, name);
        if (!vq) {
                vring_free_queue(vdev, queue_size_in_bytes, queue,
@@ -1079,6 +1125,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
                                      unsigned int vring_align,
                                      struct virtio_device *vdev,
                                      bool weak_barriers,
+                                     bool context,
                                      void *pages,
                                      bool (*notify)(struct virtqueue *vq),
                                      void (*callback)(struct virtqueue *vq),
@@ -1086,7 +1133,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 {
        struct vring vring;
        vring_init(&vring, num, pages, vring_align);
-       return __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+       return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
                                     notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
index 6c70444da3b9d3e5c31f04193b96986ff0e4b04b..6b2e0dd88569b13c66ef445609b2f12419fdd3ac 100644 (file)
 struct ptr_ring {
        int producer ____cacheline_aligned_in_smp;
        spinlock_t producer_lock;
-       int consumer ____cacheline_aligned_in_smp;
+       int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
+       int consumer_tail; /* next entry to invalidate */
        spinlock_t consumer_lock;
        /* Shared consumer/producer data */
        /* Read-only by both the producer and the consumer */
        int size ____cacheline_aligned_in_smp; /* max entries in queue */
+       int batch; /* number of entries to consume in a batch */
        void **queue;
 };
 
@@ -170,7 +172,7 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
 static inline void *__ptr_ring_peek(struct ptr_ring *r)
 {
        if (likely(r->size))
-               return r->queue[r->consumer];
+               return r->queue[r->consumer_head];
        return NULL;
 }
 
@@ -231,9 +233,38 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
 /* Must only be called after __ptr_ring_peek returned !NULL */
 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
 {
-       r->queue[r->consumer++] = NULL;
-       if (unlikely(r->consumer >= r->size))
-               r->consumer = 0;
+       /* Fundamentally, what we want to do is update consumer
+        * index and zero out the entry so producer can reuse it.
+        * Doing it naively at each consume would be as simple as:
+        *       r->queue[r->consumer++] = NULL;
+        *       if (unlikely(r->consumer >= r->size))
+        *               r->consumer = 0;
+        * but that is suboptimal when the ring is full as producer is writing
+        * out new entries in the same cache line.  Defer these updates until a
+        * batch of entries has been consumed.
+        */
+       int head = r->consumer_head++;
+
+       /* Once we have processed enough entries invalidate them in
+        * the ring all at once so producer can reuse their space in the ring.
+        * We also do this when we reach end of the ring - not mandatory
+        * but helps keep the implementation simple.
+        */
+       if (unlikely(r->consumer_head - r->consumer_tail >= r->batch ||
+                    r->consumer_head >= r->size)) {
+               /* Zero out entries in the reverse order: this way we touch the
+                * cache line that producer might currently be reading the last;
+                * producer won't make progress and touch other cache lines
+                * besides the first one until we write out all entries.
+                */
+               while (likely(head >= r->consumer_tail))
+                       r->queue[head--] = NULL;
+               r->consumer_tail = r->consumer_head;
+       }
+       if (unlikely(r->consumer_head >= r->size)) {
+               r->consumer_head = 0;
+               r->consumer_tail = 0;
+       }
 }
 
 static inline void *__ptr_ring_consume(struct ptr_ring *r)
@@ -345,14 +376,27 @@ static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp)
        return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
 }
 
+static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
+{
+       r->size = size;
+       r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
+       /* We need to set batch at least to 1 to make logic
+        * in __ptr_ring_discard_one work correctly.
+        * Batching too much (because ring is small) would cause a lot of
+        * burstiness. Needs tuning, for now disable batching.
+        */
+       if (r->batch > r->size / 2 || !r->batch)
+               r->batch = 1;
+}
+
 static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
 {
        r->queue = __ptr_ring_init_queue_alloc(size, gfp);
        if (!r->queue)
                return -ENOMEM;
 
-       r->size = size;
-       r->producer = r->consumer = 0;
+       __ptr_ring_set_size(r, size);
+       r->producer = r->consumer_head = r->consumer_tail = 0;
        spin_lock_init(&r->producer_lock);
        spin_lock_init(&r->consumer_lock);
 
@@ -373,9 +417,10 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
                else if (destroy)
                        destroy(ptr);
 
-       r->size = size;
+       __ptr_ring_set_size(r, size);
        r->producer = producer;
-       r->consumer = 0;
+       r->consumer_head = 0;
+       r->consumer_tail = 0;
        old = r->queue;
        r->queue = queue;
 
index 7edfbdb55a995d436bf9e999ce202d0ca0bf2550..28b0e965360ff1822a22252b1ecaaab52ca8ab2d 100644 (file)
@@ -44,6 +44,12 @@ int virtqueue_add_inbuf(struct virtqueue *vq,
                        void *data,
                        gfp_t gfp);
 
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+                           struct scatterlist sg[], unsigned int num,
+                           void *data,
+                           void *ctx,
+                           gfp_t gfp);
+
 int virtqueue_add_sgs(struct virtqueue *vq,
                      struct scatterlist *sgs[],
                      unsigned int out_sgs,
@@ -59,6 +65,9 @@ bool virtqueue_notify(struct virtqueue *vq);
 
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
+void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
+                           void **ctx);
+
 void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
@@ -156,9 +165,13 @@ int virtio_device_restore(struct virtio_device *dev);
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @scan: optional function to call after successful probe; intended
+ *    for virtio-scsi to invoke a scan.
  * @remove: the function to call when a device is removed.
  * @config_changed: optional function to call when the device configuration
  *    changes; may be called in interrupt context.
+ * @freeze: optional function to call during suspend/hibernation.
+ * @restore: optional function to call on resume.
  */
 struct virtio_driver {
        struct device_driver driver;
index 8355bab175e1d8fb27ac9e0860465ba8afc36076..0133d8a12ccd468514a72fdf8eff64aa7d69551d 100644 (file)
@@ -72,7 +72,8 @@ struct virtio_config_ops {
        void (*reset)(struct virtio_device *vdev);
        int (*find_vqs)(struct virtio_device *, unsigned nvqs,
                        struct virtqueue *vqs[], vq_callback_t *callbacks[],
-                       const char * const names[], struct irq_affinity *desc);
+                       const char * const names[], const bool *ctx,
+                       struct irq_affinity *desc);
        void (*del_vqs)(struct virtio_device *);
        u64 (*get_features)(struct virtio_device *vdev);
        int (*finalize_features)(struct virtio_device *vdev);
@@ -173,12 +174,32 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
        vq_callback_t *callbacks[] = { c };
        const char *names[] = { n };
        struct virtqueue *vq;
-       int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL);
+       int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
+                                        NULL);
        if (err < 0)
                return ERR_PTR(err);
        return vq;
 }
 
+static inline
+int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+                       struct virtqueue *vqs[], vq_callback_t *callbacks[],
+                       const char * const names[],
+                       struct irq_affinity *desc)
+{
+       return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
+}
+
+static inline
+int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
+                       struct virtqueue *vqs[], vq_callback_t *callbacks[],
+                       const char * const names[], const bool *ctx,
+                       struct irq_affinity *desc)
+{
+       return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
+                                     desc);
+}
+
 /**
  * virtio_device_ready - enable vq use in probe function
  * @vdev: the device
index e8d36938f09a56cfe4729c17b0ee8175b4991bbe..270cfa81830ee4a69bca0c4ccf914a7a94e5b3e1 100644 (file)
@@ -71,6 +71,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
                                         struct virtio_device *vdev,
                                         bool weak_barriers,
                                         bool may_reduce_num,
+                                        bool ctx,
                                         bool (*notify)(struct virtqueue *vq),
                                         void (*callback)(struct virtqueue *vq),
                                         const char *name);
@@ -80,6 +81,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
                                        struct vring vring,
                                        struct virtio_device *vdev,
                                        bool weak_barriers,
+                                       bool ctx,
                                        bool (*notify)(struct virtqueue *),
                                        void (*callback)(struct virtqueue *),
                                        const char *name);
@@ -93,6 +95,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
                                      unsigned int vring_align,
                                      struct virtio_device *vdev,
                                      bool weak_barriers,
+                                     bool ctx,
                                      void *pages,
                                      bool (*notify)(struct virtqueue *vq),
                                      void (*callback)(struct virtqueue *vq),
index 9dffe0282ad4f08efe8df7ab48695183b5117739..403d86e80162e7796fd75249b1ae876d1eee1e6a 100644 (file)
@@ -576,9 +576,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 
        vsock->vdev = vdev;
 
-       ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
-                                           vsock->vqs, callbacks, names,
-                                           NULL);
+       ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX,
+                             vsock->vqs, callbacks, names,
+                             NULL);
        if (ret < 0)
                goto out;
 
index 9377c8b4ac167723de43088e96e5b5f0effcf4b2..d8f534025b7f7bd3cb27d663f78de9b232fa85d3 100644 (file)
@@ -57,6 +57,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
                                      unsigned int vring_align,
                                      struct virtio_device *vdev,
                                      bool weak_barriers,
+                                     bool ctx,
                                      void *pages,
                                      bool (*notify)(struct virtqueue *vq),
                                      void (*callback)(struct virtqueue *vq),
index f31353fac5415d8b9f5614e6f46f71a8f062f09b..453ca3c211933d12a5b7933852043f7154434fe6 100644 (file)
@@ -20,6 +20,7 @@
 int runcycles = 10000000;
 int max_outstanding = INT_MAX;
 int batch = 1;
+int param = 0;
 
 bool do_sleep = false;
 bool do_relax = false;
@@ -86,7 +87,7 @@ void set_affinity(const char *arg)
        cpu = strtol(arg, &endptr, 0);
        assert(!*endptr);
 
-       assert(cpu >= 0 || cpu < CPU_SETSIZE);
+       assert(cpu >= 0 && cpu < CPU_SETSIZE);
 
        self = pthread_self();
        CPU_ZERO(&cpuset);
@@ -246,6 +247,11 @@ static const struct option longopts[] = {
                .has_arg = required_argument,
                .val = 'b',
        },
+       {
+               .name = "param",
+               .has_arg = required_argument,
+               .val = 'p',
+       },
        {
                .name = "sleep",
                .has_arg = no_argument,
@@ -274,6 +280,7 @@ static void help(void)
                " [--run-cycles C (default: %d)]"
                " [--batch b]"
                " [--outstanding o]"
+               " [--param p]"
                " [--sleep]"
                " [--relax]"
                " [--exit]"
@@ -328,6 +335,12 @@ int main(int argc, char **argv)
                        assert(c > 0 && c < INT_MAX);
                        max_outstanding = c;
                        break;
+               case 'p':
+                       c = strtol(optarg, &endptr, 0);
+                       assert(!*endptr);
+                       assert(c > 0 && c < INT_MAX);
+                       param = c;
+                       break;
                case 'b':
                        c = strtol(optarg, &endptr, 0);
                        assert(!*endptr);
index 14142faf040b7e81a1c38a983aa76d9ae50ee4e1..90b0133004e17ddbc48b90a9eab2e6a489da13b0 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <stdbool.h>
 
+extern int param;
+
 extern bool do_exit;
 
 #if defined(__x86_64__) || defined(__i386__)
index 635b07b4fdd3949c7883a2775575c0ff4d8ce228..7b22f1b20652082b606e4ee55dae31b56f670ef3 100644 (file)
@@ -97,6 +97,9 @@ void alloc_ring(void)
 {
        int ret = ptr_ring_init(&array, ring_size, 0);
        assert(!ret);
+       /* Hacky way to poke at ring internals. Useful for testing though. */
+       if (param)
+               array.batch = param;
 }
 
 /* guest side */
index e0445898f08fa981d372d4de6bad4eebe265cb15..0fecaec90d0d69cf622555104cf9b5f66001b68d 100644 (file)
@@ -100,7 +100,7 @@ static void vq_info_add(struct vdev_info *dev, int num)
        vring_init(&info->vring, num, info->ring, 4096);
        info->vq = vring_new_virtqueue(info->idx,
                                       info->vring.num, 4096, &dev->vdev,
-                                      true, info->ring,
+                                      true, false, info->ring,
                                       vq_notify, vq_callback, "test");
        assert(info->vq);
        info->vq->priv = info;
@@ -202,7 +202,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq,
        test = 0;
        r = ioctl(dev->control, VHOST_TEST_RUN, &test);
        assert(r >= 0);
-       fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
+       fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious);
 }
 
 const char optstring[] = "h";
index 5f94f51056781e1f8a662be8db245cfd0285b5a8..9476c616d0642c5e2f0162d4fabd3945a98d4f32 100644 (file)
@@ -314,7 +314,8 @@ static int parallel_test(u64 features,
                        err(1, "Could not set affinity to cpu %u", first_cpu);
 
                vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
-                                        guest_map, fast_vringh ? no_notify_host
+                                        false, guest_map,
+                                        fast_vringh ? no_notify_host
                                         : parallel_notify_host,
                                         never_callback_guest, "guest vq");
 
@@ -479,7 +480,7 @@ int main(int argc, char *argv[])
        memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
 
        /* Set up guest side. */
-       vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
+       vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false,
                                 __user_addr_min,
                                 never_notify_host, never_callback_guest,
                                 "guest vq");
@@ -663,7 +664,7 @@ int main(int argc, char *argv[])
                /* Force creation of direct, which we modify. */
                __virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
                vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
-                                        __user_addr_min,
+                                        false, __user_addr_min,
                                         never_notify_host,
                                         never_callback_guest,
                                         "guest vq");