Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

[sfrench/cifs-2.6.git] / drivers / net / virtio_net.c
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

index 3b3eebad39772c9160098f20e32b278dd6e7127b..d934774e9733bc1077c05f026127ea143fdc9f8f 100644 (file)
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -135,6 +135,9 @@ struct send_queue {
         struct virtnet_sq_stats stats;
  
         struct napi_struct napi;
+
+       /* Record whether sq is in reset state. */
+       bool reset;
  };
  
  /* Internal representation of a receive virtqueue */
@@ -267,6 +270,12 @@ struct virtnet_info {
         u8 duplex;
         u32 speed;
  
+       /* Interrupt coalescing settings */
+       u32 tx_usecs;
+       u32 rx_usecs;
+       u32 tx_max_packets;
+       u32 rx_max_packets;
+
         unsigned long guest_offloads;
         unsigned long guest_offloads_capable;
  
@@ -284,6 +293,9 @@ struct padded_vnet_hdr {
         char padding[12];
  };
  
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
+
  static bool is_xdp_frame(void *ptr)
  {
         return (unsigned long)ptr & VIRTIO_XDP_FLAG;
@@ -1628,6 +1640,11 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
                 return;
  
         if (__netif_tx_trylock(txq)) {
+               if (sq->reset) {
+                       __netif_tx_unlock(txq);
+                       return;
+               }
+
                 do {
                         virtqueue_disable_cb(sq->vq);
                         free_old_xmit_skbs(sq, true);
@@ -1875,6 +1892,70 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
         return NETDEV_TX_OK;
  }
  
+static int virtnet_rx_resize(struct virtnet_info *vi,
+                            struct receive_queue *rq, u32 ring_num)
+{
+       bool running = netif_running(vi->dev);
+       int err, qindex;
+
+       qindex = rq - vi->rq;
+
+       if (running)
+               napi_disable(&rq->napi);
+
+       err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
+       if (err)
+               netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
+
+       if (!try_fill_recv(vi, rq, GFP_KERNEL))
+               schedule_delayed_work(&vi->refill, 0);
+
+       if (running)
+               virtnet_napi_enable(rq->vq, &rq->napi);
+       return err;
+}
+
+static int virtnet_tx_resize(struct virtnet_info *vi,
+                            struct send_queue *sq, u32 ring_num)
+{
+       bool running = netif_running(vi->dev);
+       struct netdev_queue *txq;
+       int err, qindex;
+
+       qindex = sq - vi->sq;
+
+       if (running)
+               virtnet_napi_tx_disable(&sq->napi);
+
+       txq = netdev_get_tx_queue(vi->dev, qindex);
+
+       /* 1. wait all ximt complete
+        * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
+        */
+       __netif_tx_lock_bh(txq);
+
+       /* Prevent rx poll from accessing sq. */
+       sq->reset = true;
+
+       /* Prevent the upper layer from trying to send packets. */
+       netif_stop_subqueue(vi->dev, qindex);
+
+       __netif_tx_unlock_bh(txq);
+
+       err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
+       if (err)
+               netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
+
+       __netif_tx_lock_bh(txq);
+       sq->reset = false;
+       netif_tx_wake_queue(txq);
+       __netif_tx_unlock_bh(txq);
+
+       if (running)
+               virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
+       return err;
+}
+
  /*
   * Send command via the control virtqueue and check status.  Commands
   * supported by the hypervisor, as indicated by feature bits, should
@@ -2285,10 +2366,57 @@ static void virtnet_get_ringparam(struct net_device *dev,
  {
         struct virtnet_info *vi = netdev_priv(dev);
  
-       ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
-       ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
-       ring->rx_pending = ring->rx_max_pending;
-       ring->tx_pending = ring->tx_max_pending;
+       ring->rx_max_pending = vi->rq[0].vq->num_max;
+       ring->tx_max_pending = vi->sq[0].vq->num_max;
+       ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
+       ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
+}
+
+static int virtnet_set_ringparam(struct net_device *dev,
+                                struct ethtool_ringparam *ring,
+                                struct kernel_ethtool_ringparam *kernel_ring,
+                                struct netlink_ext_ack *extack)
+{
+       struct virtnet_info *vi = netdev_priv(dev);
+       u32 rx_pending, tx_pending;
+       struct receive_queue *rq;
+       struct send_queue *sq;
+       int i, err;
+
+       if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+               return -EINVAL;
+
+       rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
+       tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
+
+       if (ring->rx_pending == rx_pending &&
+           ring->tx_pending == tx_pending)
+               return 0;
+
+       if (ring->rx_pending > vi->rq[0].vq->num_max)
+               return -EINVAL;
+
+       if (ring->tx_pending > vi->sq[0].vq->num_max)
+               return -EINVAL;
+
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               rq = vi->rq + i;
+               sq = vi->sq + i;
+
+               if (ring->tx_pending != tx_pending) {
+                       err = virtnet_tx_resize(vi, sq, ring->tx_pending);
+                       if (err)
+                               return err;
+               }
+
+               if (ring->rx_pending != rx_pending) {
+                       err = virtnet_rx_resize(vi, rq, ring->rx_pending);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
  }
  
  static bool virtnet_commit_rss_command(struct virtnet_info *vi)
@@ -2618,27 +2746,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
         return 0;
  }
  
+static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
+                                      struct ethtool_coalesce *ec)
+{
+       struct scatterlist sgs_tx, sgs_rx;
+       struct virtio_net_ctrl_coal_tx coal_tx;
+       struct virtio_net_ctrl_coal_rx coal_rx;
+
+       coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
+       coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
+       sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
+
+       if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+                                 VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
+                                 &sgs_tx))
+               return -EINVAL;
+
+       /* Save parameters */
+       vi->tx_usecs = ec->tx_coalesce_usecs;
+       vi->tx_max_packets = ec->tx_max_coalesced_frames;
+
+       coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
+       coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
+       sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
+
+       if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
+                                 VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
+                                 &sgs_rx))
+               return -EINVAL;
+
+       /* Save parameters */
+       vi->rx_usecs = ec->rx_coalesce_usecs;
+       vi->rx_max_packets = ec->rx_max_coalesced_frames;
+
+       return 0;
+}
+
+static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
+{
+       /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
+        * feature is negotiated.
+        */
+       if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
+               return -EOPNOTSUPP;
+
+       if (ec->tx_max_coalesced_frames > 1 ||
+           ec->rx_max_coalesced_frames != 1)
+               return -EINVAL;
+
+       return 0;
+}
+
  static int virtnet_set_coalesce(struct net_device *dev,
                                 struct ethtool_coalesce *ec,
                                 struct kernel_ethtool_coalesce *kernel_coal,
                                 struct netlink_ext_ack *extack)
  {
         struct virtnet_info *vi = netdev_priv(dev);
-       int i, napi_weight;
-
-       if (ec->tx_max_coalesced_frames > 1 ||
-           ec->rx_max_coalesced_frames != 1)
-               return -EINVAL;
+       int ret, i, napi_weight;
+       bool update_napi = false;
  
+       /* Can't change NAPI weight if the link is up */
         napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
         if (napi_weight ^ vi->sq[0].napi.weight) {
                 if (dev->flags & IFF_UP)
                         return -EBUSY;
+               else
+                       update_napi = true;
+       }
+
+       if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
+               ret = virtnet_send_notf_coal_cmds(vi, ec);
+       else
+               ret = virtnet_coal_params_supported(ec);
+
+       if (ret)
+               return ret;
+
+       if (update_napi) {
                 for (i = 0; i < vi->max_queue_pairs; i++)
                         vi->sq[i].napi.weight = napi_weight;
         }
  
-       return 0;
+       return ret;
  }
  
  static int virtnet_get_coalesce(struct net_device *dev,
@@ -2646,16 +2836,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
                                 struct kernel_ethtool_coalesce *kernel_coal,
                                 struct netlink_ext_ack *extack)
  {
-       struct ethtool_coalesce ec_default = {
-               .cmd = ETHTOOL_GCOALESCE,
-               .rx_max_coalesced_frames = 1,
-       };
         struct virtnet_info *vi = netdev_priv(dev);
  
-       memcpy(ec, &ec_default, sizeof(ec_default));
+       if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
+               ec->rx_coalesce_usecs = vi->rx_usecs;
+               ec->tx_coalesce_usecs = vi->tx_usecs;
+               ec->tx_max_coalesced_frames = vi->tx_max_packets;
+               ec->rx_max_coalesced_frames = vi->rx_max_packets;
+       } else {
+               ec->rx_max_coalesced_frames = 1;
  
-       if (vi->sq[0].napi.weight)
-               ec->tx_max_coalesced_frames = 1;
+               if (vi->sq[0].napi.weight)
+                       ec->tx_max_coalesced_frames = 1;
+       }
  
         return 0;
  }
@@ -2774,10 +2967,12 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
  }
  
  static const struct ethtool_ops virtnet_ethtool_ops = {
-       .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
+       .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
+               ETHTOOL_COALESCE_USECS,
         .get_drvinfo = virtnet_get_drvinfo,
         .get_link = ethtool_op_get_link,
         .get_ringparam = virtnet_get_ringparam,
+       .set_ringparam = virtnet_set_ringparam,
         .get_strings = virtnet_get_strings,
         .get_sset_count = virtnet_get_sset_count,
         .get_ethtool_stats = virtnet_get_ethtool_stats,
@@ -3171,6 +3366,27 @@ static void free_receive_page_frags(struct virtnet_info *vi)
                         put_page(vi->rq[i].alloc_frag.page);
  }
  
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
+{
+       if (!is_xdp_frame(buf))
+               dev_kfree_skb(buf);
+       else
+               xdp_return_frame(ptr_to_xdp(buf));
+}
+
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
+{
+       struct virtnet_info *vi = vq->vdev->priv;
+       int i = vq2rxq(vq);
+
+       if (vi->mergeable_rx_bufs)
+               put_page(virt_to_head_page(buf));
+       else if (vi->big_packets)
+               give_pages(&vi->rq[i], buf);
+       else
+               put_page(virt_to_head_page(buf));
+}
+
  static void free_unused_bufs(struct virtnet_info *vi)
  {
         void *buf;
@@ -3178,26 +3394,14 @@ static void free_unused_bufs(struct virtnet_info *vi)
  
         for (i = 0; i < vi->max_queue_pairs; i++) {
                 struct virtqueue *vq = vi->sq[i].vq;
-               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
-                       if (!is_xdp_frame(buf))
-                               dev_kfree_skb(buf);
-                       else
-                               xdp_return_frame(ptr_to_xdp(buf));
-               }
+               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+                       virtnet_sq_free_unused_buf(vq, buf);
         }
  
         for (i = 0; i < vi->max_queue_pairs; i++) {
                 struct virtqueue *vq = vi->rq[i].vq;
-
-               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
-                       if (vi->mergeable_rx_bufs) {
-                               put_page(virt_to_head_page(buf));
-                       } else if (vi->big_packets) {
-                               give_pages(&vi->rq[i], buf);
-                       } else {
-                               put_page(virt_to_head_page(buf));
-                       }
-               }
+               while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+                       virtnet_rq_free_unused_buf(vq, buf);
         }
  }
  
@@ -3228,6 +3432,29 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu
                    (unsigned int)GOOD_PACKET_LEN);
  }
  
+static void virtnet_config_sizes(struct virtnet_info *vi, u32 *sizes)
+{
+       u32 i, rx_size, tx_size;
+
+       if (vi->speed == SPEED_UNKNOWN || vi->speed < SPEED_10000) {
+               rx_size = 1024;
+               tx_size = 1024;
+
+       } else if (vi->speed < SPEED_40000) {
+               rx_size = 1024 * 4;
+               tx_size = 1024 * 4;
+
+       } else {
+               rx_size = 1024 * 8;
+               tx_size = 1024 * 8;
+       }
+
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               sizes[rxq2vq(i)] = rx_size;
+               sizes[txq2vq(i)] = tx_size;
+       }
+}
+
  static int virtnet_find_vqs(struct virtnet_info *vi)
  {
         vq_callback_t **callbacks;
@@ -3235,6 +3462,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
         int ret = -ENOMEM;
         int i, total_vqs;
         const char **names;
+       u32 *sizes;
         bool *ctx;
  
         /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
@@ -3262,10 +3490,15 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
                 ctx = NULL;
         }
  
+       sizes = kmalloc_array(total_vqs, sizeof(*sizes), GFP_KERNEL);
+       if (!sizes)
+               goto err_sizes;
+
         /* Parameters for control virtqueue, if any */
         if (vi->has_cvq) {
                 callbacks[total_vqs - 1] = NULL;
                 names[total_vqs - 1] = "control";
+               sizes[total_vqs - 1] = 64;
         }
  
         /* Allocate/initialize parameters for send/receive virtqueues */
@@ -3280,8 +3513,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
                         ctx[rxq2vq(i)] = true;
         }
  
-       ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,
-                                 names, ctx, NULL);
+       virtnet_config_sizes(vi, sizes);
+
+       ret = virtio_find_vqs_ctx_size(vi->vdev, total_vqs, vqs, callbacks,
+                                      names, sizes, ctx, NULL);
         if (ret)
                 goto err_find;
  
@@ -3301,6 +3536,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
  
  
  err_find:
+       kfree(sizes);
+err_sizes:
         kfree(ctx);
  err_ctx:
         kfree(names);
@@ -3444,6 +3681,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
              VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
                              "VIRTIO_NET_F_CTRL_VQ") ||
              VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
+                            "VIRTIO_NET_F_CTRL_VQ") ||
+            VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
                              "VIRTIO_NET_F_CTRL_VQ"))) {
                 return false;
         }
@@ -3580,6 +3819,13 @@ static int virtnet_probe(struct virtio_device *vdev)
         if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
                 vi->mergeable_rx_bufs = true;
  
+       if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
+               vi->rx_usecs = 0;
+               vi->tx_usecs = 0;
+               vi->tx_max_packets = 0;
+               vi->rx_max_packets = 0;
+       }
+
         if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
                 vi->has_rss_hash_report = true;
  
@@ -3651,6 +3897,9 @@ static int virtnet_probe(struct virtio_device *vdev)
                 vi->curr_queue_pairs = num_online_cpus();
         vi->max_queue_pairs = max_queue_pairs;
  
+       virtnet_init_settings(dev);
+       virtnet_update_settings(vi);
+
         /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
         err = init_vqs(vi);
         if (err)
@@ -3663,8 +3912,6 @@ static int virtnet_probe(struct virtio_device *vdev)
         netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
         netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
  
-       virtnet_init_settings(dev);
-
         if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
                 vi->failover = net_failover_create(vi->dev);
                 if (IS_ERR(vi->failover)) {
@@ -3814,7 +4061,7 @@ static struct virtio_device_id id_table[] = {
         VIRTIO_NET_F_CTRL_MAC_ADDR, \
         VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
         VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
-       VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT
+       VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
  
  static unsigned int features[] = {
         VIRTNET_FEATURES,