virtio-net: do not reset during XDP set
[sfrench/cifs-2.6.git] / drivers / net / virtio_net.c
index 99a26a9efec1ab695bae833a1f9dca437e9dde5c..f894713dca2a973900bd365e8292c21ed4fcb18d 100644 (file)
@@ -270,6 +270,23 @@ static void skb_xmit_done(struct virtqueue *vq)
                netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
+#define MRG_CTX_HEADER_SHIFT 22
+static void *mergeable_len_to_ctx(unsigned int truesize,
+                                 unsigned int headroom)
+{
+       return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
+}
+
+static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
+{
+       return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
+}
+
+static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
+{
+       return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
+}
+
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
                                   struct receive_queue *rq,
@@ -390,19 +407,85 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
        return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
 }
 
+/* We copy the packet for XDP in the following cases:
+ *
+ * 1) Packet is scattered across multiple rx buffers.
+ * 2) Headroom space is insufficient.
+ *
+ * This is inefficient but it's a temporary condition that
+ * we hit right after XDP is enabled and until queue is refilled
+ * with large buffers with sufficient headroom - so it should affect
+ * at most queue size packets.
+ * Afterwards, the conditions to enable
+ * XDP should preclude the underlying device from sending packets
+ * across multiple buffers (num_buf > 1), and we make sure buffers
+ * have enough headroom.
+ */
+static struct page *xdp_linearize_page(struct receive_queue *rq,
+                                      u16 *num_buf,
+                                      struct page *p,
+                                      int offset,
+                                      int page_off,
+                                      unsigned int *len)
+{
+       struct page *page = alloc_page(GFP_ATOMIC);
+
+       if (!page)
+               return NULL;
+
+       memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+       page_off += *len;
+
+       while (--*num_buf) {
+               unsigned int buflen;
+               void *buf;
+               int off;
+
+               buf = virtqueue_get_buf(rq->vq, &buflen);
+               if (unlikely(!buf))
+                       goto err_buf;
+
+               p = virt_to_head_page(buf);
+               off = buf - page_address(p);
+
+               /* guard against a misconfigured or uncooperative backend that
+                * is sending packet larger than the MTU.
+                */
+               if ((page_off + buflen) > PAGE_SIZE) {
+                       put_page(p);
+                       goto err_buf;
+               }
+
+               memcpy(page_address(page) + page_off,
+                      page_address(p) + off, buflen);
+               page_off += buflen;
+               put_page(p);
+       }
+
+       /* Headroom does not contribute to packet length */
+       *len = page_off - VIRTIO_XDP_HEADROOM;
+       return page;
+err_buf:
+       __free_pages(page, 0);
+       return NULL;
+}
+
 static struct sk_buff *receive_small(struct net_device *dev,
                                     struct virtnet_info *vi,
                                     struct receive_queue *rq,
-                                    void *buf, unsigned int len)
+                                    void *buf, void *ctx,
+                                    unsigned int len)
 {
        struct sk_buff *skb;
        struct bpf_prog *xdp_prog;
-       unsigned int xdp_headroom = virtnet_get_headroom(vi);
+       unsigned int xdp_headroom = (unsigned long)ctx;
        unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
        unsigned int headroom = vi->hdr_len + header_offset;
        unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
                              SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+       struct page *page = virt_to_head_page(buf);
        unsigned int delta = 0;
+       struct page *xdp_page;
        len -= vi->hdr_len;
 
        rcu_read_lock();
@@ -416,6 +499,27 @@ static struct sk_buff *receive_small(struct net_device *dev,
                if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
                        goto err_xdp;
 
+               if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
+                       int offset = buf - page_address(page) + header_offset;
+                       unsigned int tlen = len + vi->hdr_len;
+                       u16 num_buf = 1;
+
+                       xdp_headroom = virtnet_get_headroom(vi);
+                       header_offset = VIRTNET_RX_PAD + xdp_headroom;
+                       headroom = vi->hdr_len + header_offset;
+                       buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+                                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+                       xdp_page = xdp_linearize_page(rq, &num_buf, page,
+                                                     offset, header_offset,
+                                                     &tlen);
+                       if (!xdp_page)
+                               goto err_xdp;
+
+                       buf = page_address(xdp_page);
+                       put_page(page);
+                       page = xdp_page;
+               }
+
                xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
                xdp.data = xdp.data_hard_start + xdp_headroom;
                xdp.data_end = xdp.data + len;
@@ -444,7 +548,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 
        skb = build_skb(buf, buflen);
        if (!skb) {
-               put_page(virt_to_head_page(buf));
+               put_page(page);
                goto err;
        }
        skb_reserve(skb, headroom - delta);
@@ -460,7 +564,7 @@ err:
 err_xdp:
        rcu_read_unlock();
        dev->stats.rx_dropped++;
-       put_page(virt_to_head_page(buf));
+       put_page(page);
 xdp_xmit:
        return NULL;
 }
@@ -485,66 +589,6 @@ err:
        return NULL;
 }
 
-/* The conditions to enable XDP should preclude the underlying device from
- * sending packets across multiple buffers (num_buf > 1). However per spec
- * it does not appear to be illegal to do so but rather just against convention.
- * So in order to avoid making a system unresponsive the packets are pushed
- * into a page and the XDP program is run. This will be extremely slow and we
- * push a warning to the user to fix this as soon as possible. Fixing this may
- * require resolving the underlying hardware to determine why multiple buffers
- * are being received or simply loading the XDP program in the ingress stack
- * after the skb is built because there is no advantage to running it here
- * anymore.
- */
-static struct page *xdp_linearize_page(struct receive_queue *rq,
-                                      u16 *num_buf,
-                                      struct page *p,
-                                      int offset,
-                                      unsigned int *len)
-{
-       struct page *page = alloc_page(GFP_ATOMIC);
-       unsigned int page_off = VIRTIO_XDP_HEADROOM;
-
-       if (!page)
-               return NULL;
-
-       memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
-       page_off += *len;
-
-       while (--*num_buf) {
-               unsigned int buflen;
-               void *buf;
-               int off;
-
-               buf = virtqueue_get_buf(rq->vq, &buflen);
-               if (unlikely(!buf))
-                       goto err_buf;
-
-               p = virt_to_head_page(buf);
-               off = buf - page_address(p);
-
-               /* guard against a misconfigured or uncooperative backend that
-                * is sending packet larger than the MTU.
-                */
-               if ((page_off + buflen) > PAGE_SIZE) {
-                       put_page(p);
-                       goto err_buf;
-               }
-
-               memcpy(page_address(page) + page_off,
-                      page_address(p) + off, buflen);
-               page_off += buflen;
-               put_page(p);
-       }
-
-       /* Headroom does not contribute to packet length */
-       *len = page_off - VIRTIO_XDP_HEADROOM;
-       return page;
-err_buf:
-       __free_pages(page, 0);
-       return NULL;
-}
-
 static struct sk_buff *receive_mergeable(struct net_device *dev,
                                         struct virtnet_info *vi,
                                         struct receive_queue *rq,
@@ -559,6 +603,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        struct sk_buff *head_skb, *curr_skb;
        struct bpf_prog *xdp_prog;
        unsigned int truesize;
+       unsigned int headroom = mergeable_ctx_to_headroom(ctx);
 
        head_skb = NULL;
 
@@ -571,10 +616,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                u32 act;
 
                /* This happens when rx buffer size is underestimated */
-               if (unlikely(num_buf > 1)) {
+               if (unlikely(num_buf > 1 ||
+                            headroom < virtnet_get_headroom(vi))) {
                        /* linearize data for XDP */
                        xdp_page = xdp_linearize_page(rq, &num_buf,
-                                                     page, offset, &len);
+                                                     page, offset,
+                                                     VIRTIO_XDP_HEADROOM,
+                                                     &len);
                        if (!xdp_page)
                                goto err_xdp;
                        offset = VIRTIO_XDP_HEADROOM;
@@ -639,13 +687,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        }
        rcu_read_unlock();
 
-       if (unlikely(len > (unsigned long)ctx)) {
+       truesize = mergeable_ctx_to_truesize(ctx);
+       if (unlikely(len > truesize)) {
                pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
                         dev->name, len, (unsigned long)ctx);
                dev->stats.rx_length_errors++;
                goto err_skb;
        }
-       truesize = (unsigned long)ctx;
+
        head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
        curr_skb = head_skb;
 
@@ -665,13 +714,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                }
 
                page = virt_to_head_page(buf);
-               if (unlikely(len > (unsigned long)ctx)) {
+
+               truesize = mergeable_ctx_to_truesize(ctx);
+               if (unlikely(len > truesize)) {
                        pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
                                 dev->name, len, (unsigned long)ctx);
                        dev->stats.rx_length_errors++;
                        goto err_skb;
                }
-               truesize = (unsigned long)ctx;
 
                num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
                if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -754,7 +804,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
        else if (vi->big_packets)
                skb = receive_big(dev, vi, rq, buf, len);
        else
-               skb = receive_small(dev, vi, rq, buf, len);
+               skb = receive_small(dev, vi, rq, buf, ctx, len);
 
        if (unlikely(!skb))
                return 0;
@@ -787,12 +837,18 @@ frame_err:
        return 0;
 }
 
+/* Unlike mergeable buffers, all buffers are allocated to the
+ * same size, except for the headroom. For this reason we do
+ * not need to use  mergeable_len_to_ctx here - it is enough
+ * to store the headroom as the context ignoring the truesize.
+ */
 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
                             gfp_t gfp)
 {
        struct page_frag *alloc_frag = &rq->alloc_frag;
        char *buf;
        unsigned int xdp_headroom = virtnet_get_headroom(vi);
+       void *ctx = (void *)(unsigned long)xdp_headroom;
        int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
        int err;
 
@@ -806,10 +862,9 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
        alloc_frag->offset += len;
        sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
                    vi->hdr_len + GOOD_PACKET_LEN);
-       err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
+       err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
        if (err < 0)
                put_page(virt_to_head_page(buf));
-
        return err;
 }
 
@@ -889,7 +944,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 
        buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
        buf += headroom; /* advance address leaving hole at front of pkt */
-       ctx = (void *)(unsigned long)len;
+       ctx = mergeable_len_to_ctx(len, headroom);
        get_page(alloc_frag->page);
        alloc_frag->offset += len + headroom;
        hole = alloc_frag->size - alloc_frag->offset;
@@ -1015,7 +1070,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget)
        void *buf;
        struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-       if (vi->mergeable_rx_bufs) {
+       if (!vi->big_packets || vi->mergeable_rx_bufs) {
                void *ctx;
 
                while (received < budget &&
@@ -1814,7 +1869,6 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 }
 
 static int init_vqs(struct virtnet_info *vi);
-static void _remove_vq_common(struct virtnet_info *vi);
 
 static int virtnet_restore_up(struct virtio_device *vdev)
 {
@@ -1843,39 +1897,6 @@ static int virtnet_restore_up(struct virtio_device *vdev)
        return err;
 }
 
-static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
-{
-       struct virtio_device *dev = vi->vdev;
-       int ret;
-
-       virtio_config_disable(dev);
-       dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
-       virtnet_freeze_down(dev);
-       _remove_vq_common(vi);
-
-       virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
-       virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
-
-       ret = virtio_finalize_features(dev);
-       if (ret)
-               goto err;
-
-       vi->xdp_queue_pairs = xdp_qp;
-       ret = virtnet_restore_up(dev);
-       if (ret)
-               goto err;
-       ret = _virtnet_set_queues(vi, curr_qp);
-       if (ret)
-               goto err;
-
-       virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-       virtio_config_enable(dev);
-       return 0;
-err:
-       virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
-       return ret;
-}
-
 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                           struct netlink_ext_ack *extack)
 {
@@ -1922,35 +1943,29 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                        return PTR_ERR(prog);
        }
 
-       /* Changing the headroom in buffers is a disruptive operation because
-        * existing buffers must be flushed and reallocated. This will happen
-        * when a xdp program is initially added or xdp is disabled by removing
-        * the xdp program resulting in number of XDP queues changing.
-        */
-       if (vi->xdp_queue_pairs != xdp_qp) {
-               err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp);
-               if (err) {
-                       dev_warn(&dev->dev, "XDP reset failure.\n");
-                       goto virtio_reset_err;
-               }
-       }
+       /* Make sure NAPI is not using any XDP TX queues for RX. */
+       for (i = 0; i < vi->max_queue_pairs; i++)
+               napi_disable(&vi->rq[i].napi);
 
        netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+       err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+       if (err)
+               goto err;
+       vi->xdp_queue_pairs = xdp_qp;
 
        for (i = 0; i < vi->max_queue_pairs; i++) {
                old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
                rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
                if (old_prog)
                        bpf_prog_put(old_prog);
+               virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
        }
 
        return 0;
 
-virtio_reset_err:
-       /* On reset error do our best to unwind XDP changes inflight and return
-        * error up to user space for resolution. The underlying reset hung on
-        * us so not much we can do here.
-        */
+err:
+       for (i = 0; i < vi->max_queue_pairs; i++)
+               virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
        if (prog)
                bpf_prog_sub(prog, vi->max_queue_pairs - 1);
        return err;
@@ -2183,7 +2198,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
        names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
        if (!names)
                goto err_names;
-       if (vi->mergeable_rx_bufs) {
+       if (!vi->big_packets || vi->mergeable_rx_bufs) {
                ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
                if (!ctx)
                        goto err_ctx;
@@ -2429,7 +2444,7 @@ static int virtnet_probe(struct virtio_device *vdev)
                        dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
 
                if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
-                       dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
+                       dev->hw_features |= NETIF_F_TSO
                                | NETIF_F_TSO_ECN | NETIF_F_TSO6;
                }
                /* Individual feature bits: what can host handle? */
@@ -2439,13 +2454,11 @@ static int virtnet_probe(struct virtio_device *vdev)
                        dev->hw_features |= NETIF_F_TSO6;
                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
                        dev->hw_features |= NETIF_F_TSO_ECN;
-               if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
-                       dev->hw_features |= NETIF_F_UFO;
 
                dev->features |= NETIF_F_GSO_ROBUST;
 
                if (gso)
-                       dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
+                       dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
                /* (!csum && gso) case will be fixed by register_netdev() */
        }
        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
@@ -2598,15 +2611,6 @@ free:
        return err;
 }
 
-static void _remove_vq_common(struct virtnet_info *vi)
-{
-       vi->vdev->config->reset(vi->vdev);
-       free_unused_bufs(vi);
-       _free_receive_bufs(vi);
-       free_receive_page_frags(vi);
-       virtnet_del_vqs(vi);
-}
-
 static void remove_vq_common(struct virtnet_info *vi)
 {
        vi->vdev->config->reset(vi->vdev);