sk_buff: drop all skb extensions on free and skb scrubbing
[sfrench/cifs-2.6.git] / net / core / dev.c
index 5156c0edebe8097403c6b78f711df5a373fd2e6d..bf3ed413abafe121d8c20ea4ca023ea770c111cc 100644 (file)
@@ -3467,18 +3467,22 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
        qdisc_calculate_pkt_len(skb, q);
 
        if (q->flags & TCQ_F_NOLOCK) {
-               if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-                       __qdisc_drop(skb, &to_free);
-                       rc = NET_XMIT_DROP;
-               } else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
-                          qdisc_run_begin(q)) {
+               if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
+                   qdisc_run_begin(q)) {
+                       if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
+                                             &q->state))) {
+                               __qdisc_drop(skb, &to_free);
+                               rc = NET_XMIT_DROP;
+                               goto end_run;
+                       }
                        qdisc_bstats_cpu_update(q, skb);
 
+                       rc = NET_XMIT_SUCCESS;
                        if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
                                __qdisc_run(q);
 
+end_run:
                        qdisc_run_end(q);
-                       rc = NET_XMIT_SUCCESS;
                } else {
                        rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
                        qdisc_run(q);
@@ -3963,6 +3967,8 @@ int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
 int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
 int dev_rx_weight __read_mostly = 64;
 int dev_tx_weight __read_mostly = 64;
+/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
+int gro_normal_batch __read_mostly = 8;
 
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
@@ -5486,7 +5492,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
        skb->data_len -= grow;
        skb->tail += grow;
 
-       pinfo->frags[0].page_offset += grow;
+       skb_frag_off_add(&pinfo->frags[0], grow);
        skb_frag_size_sub(&pinfo->frags[0], grow);
 
        if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
@@ -5660,7 +5666,7 @@ EXPORT_SYMBOL(gro_find_complete_by_type);
 static void napi_skb_free_stolen_head(struct sk_buff *skb)
 {
        skb_dst_drop(skb);
-       secpath_reset(skb);
+       skb_ext_put(skb);
        kmem_cache_free(skbuff_head_cache, skb);
 }
 
@@ -5727,7 +5733,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
        skb->encapsulation = 0;
        skb_shinfo(skb)->gso_type = 0;
        skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
-       secpath_reset(skb);
+       skb_ext_reset(skb);
 
        napi->skb = skb;
 }
@@ -5747,6 +5753,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+       if (!napi->rx_count)
+               return;
+       netif_receive_skb_list_internal(&napi->rx_list);
+       INIT_LIST_HEAD(&napi->rx_list);
+       napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing.  If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+       list_add_tail(&skb->list, &napi->rx_list);
+       if (++napi->rx_count >= gro_normal_batch)
+               gro_normal_list(napi);
+}
+
 static gro_result_t napi_frags_finish(struct napi_struct *napi,
                                      struct sk_buff *skb,
                                      gro_result_t ret)
@@ -5756,8 +5782,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
        case GRO_HELD:
                __skb_push(skb, ETH_HLEN);
                skb->protocol = eth_type_trans(skb, skb->dev);
-               if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
-                       ret = GRO_DROP;
+               if (ret == GRO_NORMAL)
+                       gro_normal_one(napi, skb);
                break;
 
        case GRO_DROP:
@@ -6034,6 +6060,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
                                 NAPIF_STATE_IN_BUSY_POLL)))
                return false;
 
+       gro_normal_list(n);
+
        if (n->gro_bitmask) {
                unsigned long timeout = 0;
 
@@ -6119,10 +6147,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
         * Ideally, a new ndo_busy_poll_stop() could avoid another round.
         */
        rc = napi->poll(napi, BUSY_POLL_BUDGET);
+       /* We can't gro_normal_list() here, because napi->poll() might have
+        * rearmed the napi (napi_complete_done()) in which case it could
+        * already be running on another CPU.
+        */
        trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
        netpoll_poll_unlock(have_poll_lock);
-       if (rc == BUSY_POLL_BUDGET)
+       if (rc == BUSY_POLL_BUDGET) {
+               /* As the whole budget was spent, we still own the napi so can
+                * safely handle the rx_list.
+                */
+               gro_normal_list(napi);
                __napi_schedule(napi);
+       }
        local_bh_enable();
 }
 
@@ -6167,6 +6204,7 @@ restart:
                }
                work = napi_poll(napi, BUSY_POLL_BUDGET);
                trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
+               gro_normal_list(napi);
 count:
                if (work > 0)
                        __NET_ADD_STATS(dev_net(napi->dev),
@@ -6272,6 +6310,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
        napi->timer.function = napi_watchdog;
        init_gro_hash(napi);
        napi->skb = NULL;
+       INIT_LIST_HEAD(&napi->rx_list);
+       napi->rx_count = 0;
        napi->poll = poll;
        if (weight > NAPI_POLL_WEIGHT)
                netdev_err_once(dev, "%s() called with weight %d\n", __func__,
@@ -6368,6 +6408,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
                goto out_unlock;
        }
 
+       gro_normal_list(n);
+
        if (n->gro_bitmask) {
                /* flush too old packets
                 * If HZ < 1000, flush all packets.
@@ -8088,12 +8130,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                bpf_chk = generic_xdp_install;
 
        if (fd >= 0) {
+               u32 prog_id;
+
                if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
                        NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
                        return -EEXIST;
                }
-               if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-                   __dev_xdp_query(dev, bpf_op, query)) {
+
+               prog_id = __dev_xdp_query(dev, bpf_op, query);
+               if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
                        NL_SET_ERR_MSG(extack, "XDP program already attached");
                        return -EBUSY;
                }
@@ -8108,6 +8153,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                        bpf_prog_put(prog);
                        return -EINVAL;
                }
+
+               if (prog->aux->id == prog_id) {
+                       bpf_prog_put(prog);
+                       return 0;
+               }
+       } else {
+               if (!__dev_xdp_query(dev, bpf_op, query))
+                       return 0;
        }
 
        err = dev_xdp_install(dev, bpf_op, extack, flags, prog);