Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

author Jakub Kicinski <kuba@kernel.org>

Thu, 15 Feb 2024 22:01:43 +0000 (14:01 -0800)

committer Jakub Kicinski <kuba@kernel.org>

Fri, 16 Feb 2024 00:20:04 +0000 (16:20 -0800)
author Jakub Kicinski <kuba@kernel.org>
Thu, 15 Feb 2024 22:01:43 +0000 (14:01 -0800)
committer Jakub Kicinski <kuba@kernel.org>
Fri, 16 Feb 2024 00:20:04 +0000 (16:20 -0800)
diff --combined .mailmap

index 95b4fe465cc6dd9d33b8f0660b171c8f563cb6b6,b99a238ee3bde17fdf4e0f6b9ca0aee81e1dc9a7..1eb607efcc6ea49075cc250b0595c4d9f15e41f8
--- 1/.mailmap
--- 2/.mailmap
+++ b/.mailmap
@@@ -191,10 -191,11 +191,11 @@@ Gao Xiang <xiang@kernel.org> <gaoxiang2
   Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com>
   Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com>
   Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com>
- Geliang Tang <geliang.tang@linux.dev> <geliang.tang@suse.com>
- Geliang Tang <geliang.tang@linux.dev> <geliangtang@xiaomi.com>
- Geliang Tang <geliang.tang@linux.dev> <geliangtang@gmail.com>
- Geliang Tang <geliang.tang@linux.dev> <geliangtang@163.com>
+ Geliang Tang <geliang@kernel.org> <geliang.tang@linux.dev>
+ Geliang Tang <geliang@kernel.org> <geliang.tang@suse.com>
+ Geliang Tang <geliang@kernel.org> <geliangtang@xiaomi.com>
+ Geliang Tang <geliang@kernel.org> <geliangtang@gmail.com>
+ Geliang Tang <geliang@kernel.org> <geliangtang@163.com>
   Georgi Djakov <djakov@kernel.org> <georgi.djakov@linaro.org>
   Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com>
   Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com>
@@@ -289,6 -290,7 +290,7 @@@ Johan Hovold <johan@kernel.org> <johan@
   John Crispin <john@phrozen.org> <blogic@openwrt.org>
   John Fastabend <john.fastabend@gmail.com> <john.r.fastabend@intel.com>
   John Keeping <john@keeping.me.uk> <john@metanate.com>
+ John Moon <john@jmoon.dev> <quic_johmoo@quicinc.com>
   John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
   John Stultz <johnstul@us.ibm.com>
   <jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
@@@ -344,6 -346,7 +346,7 @@@ Leonid I Ananiev <leonid.i.ananiev@inte
   Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
   Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
   Leon Romanovsky <leon@kernel.org> <leonro@nvidia.com>
+ Leo Yan <leo.yan@linux.dev> <leo.yan@linaro.org>
   Liam Mark <quic_lmark@quicinc.com> <lmark@codeaurora.org>
   Linas Vepstas <linas@austin.ibm.com>
   Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
@@@ -568,7 -571,6 +571,7 @@@ Simon Kelley <simon@thekelleys.org.uk
   Sricharan Ramabadhran <quic_srichara@quicinc.com> <sricharan@codeaurora.org>
   Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
   Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org>
+ +Stefan Wahren <wahrenst@gmx.net> <stefan.wahren@i2se.com>
   Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
   Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
   Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
diff --combined Documentation/netlink/specs/dpll.yaml

index 1755066d8308c3f6a2b138090fe35033bce78367,3dcc9ece272aad6842a6297c6d5bf2cca2c2acc3..8dc1df5cfae7466506d6ca163cdae2d6515d377c
--- 1/Documentation/netlink/specs/dpll.yaml
--- 2/Documentation/netlink/specs/dpll.yaml
+++ b/Documentation/netlink/specs/dpll.yaml
@@@ -51,40 -51,6 +51,40 @@@ definitions
             if dpll lock-state was not DPLL_LOCK_STATUS_LOCKED_HO_ACQ, the
             dpll's lock-state shall remain DPLL_LOCK_STATUS_UNLOCKED)
       render-max: true
+ +  -
+ +    type: enum
+ +    name: lock-status-error
+ +    doc: |
+ +      if previous status change was done due to a failure, this provides
+ +      information of dpll device lock status error.
+ +      Valid values for DPLL_A_LOCK_STATUS_ERROR attribute
+ +    entries:
+ +      -
+ +        name: none
+ +        doc: |
+ +          dpll device lock status was changed without any error
+ +        value: 1
+ +      -
+ +        name: undefined
+ +        doc: |
+ +          dpll device lock status was changed due to undefined error.
+ +          Driver fills this value up in case it is not able
+ +          to obtain suitable exact error type.
+ +      -
+ +        name: media-down
+ +        doc: |
+ +          dpll device lock status was changed because of associated
+ +          media got down.
+ +          This may happen for example if dpll device was previously
+ +          locked on an input pin of type PIN_TYPE_SYNCE_ETH_PORT.
+ +      -
+ +        name: fractional-frequency-offset-too-high
+ +        doc: |
+ +          the FFO (Fractional Frequency Offset) between the RX and TX
+ +          symbol rate on the media got too high.
+ +          This may happen for example if dpll device was previously
+ +          locked on an input pin of type PIN_TYPE_SYNCE_ETH_PORT.
+ +    render-max: true
     -
       type: const
       name: temp-divider
@@@ -248,10 -214,6 +248,10 @@@ attribute-sets
           name: type
           type: u32
           enum: type
+ +      -
+ +        name: lock-status-error
+ +        type: u32
+ +        enum: lock-status-error
     -
       name: pin
       enum-name: dpll_a_pin
@@@ -417,14 -379,11 +417,12 @@@ operations
               - mode
               - mode-supported
               - lock-status
+ +            - lock-status-error
               - temp
               - clock-id
               - type
   
         dump:
-         pre: dpll-lock-dumpit
-         post: dpll-unlock-dumpit
           reply: *dev-attrs
   
       -
@@@ -512,8 -471,6 +510,6 @@@
               - fractional-frequency-offset
   
         dump:
-         pre: dpll-lock-dumpit
-         post: dpll-unlock-dumpit
           request:
             attributes:
               - id
diff --combined MAINTAINERS

index 2b775f4369e078d1d53063d0eae273879965c288,a0697e2fb8e8bc97a9062bdd8771564dd3969fe8..ac771f6e4a3a423cb1d7369118c1526e1d879603
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -3799,7 -3799,6 +3799,7 @@@ M:      Alexei Starovoitov <ast@kernel.org
   M:    Daniel Borkmann <daniel@iogearbox.net>
   M:    Andrii Nakryiko <andrii@kernel.org>
   R:    Martin KaFai Lau <martin.lau@linux.dev>
+ +R:    Eduard Zingerman <eddyz87@gmail.com>
   R:    Song Liu <song@kernel.org>
   R:    Yonghong Song <yonghong.song@linux.dev>
   R:    John Fastabend <john.fastabend@gmail.com>
@@@ -3860,7 -3859,6 +3860,7 @@@ F:      net/unix/unix_bpf.
   
   BPF [LIBRARY] (libbpf)
   M:    Andrii Nakryiko <andrii@kernel.org>
+ +M:    Eduard Zingerman <eddyz87@gmail.com>
   L:    bpf@vger.kernel.org
   S:    Maintained
   F:    tools/lib/bpf/
@@@ -3918,7 -3916,6 +3918,7 @@@ F:      security/bpf
   
   BPF [SELFTESTS] (Test Runners & Infrastructure)
   M:    Andrii Nakryiko <andrii@kernel.org>
+ +M:    Eduard Zingerman <eddyz87@gmail.com>
   R:    Mykola Lysenko <mykolal@fb.com>
   L:    bpf@vger.kernel.org
   S:    Maintained
@@@ -4632,8 -4629,8 +4632,8 @@@ S:      Maintaine
   F:    net/sched/sch_cake.c
   
   CAN NETWORK DRIVERS
- -M:    Wolfgang Grandegger <wg@grandegger.com>
   M:    Marc Kleine-Budde <mkl@pengutronix.de>
+ +M:    Vincent Mailhol <mailhol.vincent@wanadoo.fr>
   L:    linux-can@vger.kernel.org
   S:    Maintained
   W:    https://github.com/linux-can
@@@ -7887,13 -7884,6 +7887,13 @@@ S:    Maintaine
   F:    include/linux/errseq.h
   F:    lib/errseq.c
   
+ +ESD CAN NETWORK DRIVERS
+ +M:    Stefan Mätje <stefan.maetje@esd.eu>
+ +R:    socketcan@esd.eu
+ +L:    linux-can@vger.kernel.org
+ +S:    Maintained
+ +F:    drivers/net/can/esd/
+ +
   ESD CAN/USB DRIVERS
   M:    Frank Jungclaus <frank.jungclaus@esd.eu>
   R:    socketcan@esd.eu
@@@ -10811,11 -10801,11 +10811,11 @@@ F:        drivers/gpio/gpio-tangier.
   
   INTEL GVT-g DRIVERS (Intel GPU Virtualization)
   M:    Zhenyu Wang <zhenyuw@linux.intel.com>
- M:    Zhi Wang <zhi.a.wang@intel.com>
+ M:    Zhi Wang <zhi.wang.linux@gmail.com>
   L:    intel-gvt-dev@lists.freedesktop.org
   L:    intel-gfx@lists.freedesktop.org
   S:    Supported
- W:    https://01.org/igvt-g
+ W:    https://github.com/intel/gvt-linux/wiki
   T:    git https://github.com/intel/gvt-linux.git
   F:    drivers/gpu/drm/i915/gvt/
   
@@@ -13066,15 -13056,6 +13066,15 @@@ L: netdev@vger.kernel.or
   S:    Supported
   F:    drivers/net/ethernet/marvell/octeon_ep
   
+ +MARVELL OCTEON ENDPOINT VF DRIVER
+ +M:    Veerasenareddy Burru <vburru@marvell.com>
+ +M:    Sathesh Edara <sedara@marvell.com>
+ +M:    Shinas Rasheed <srasheed@marvell.com>
+ +M:    Satananda Burla <sburla@marvell.com>
+ +L:    netdev@vger.kernel.org
+ +S:    Supported
+ +F:    drivers/net/ethernet/marvell/octeon_ep_vf
+ +
   MARVELL OCTEONTX2 PHYSICAL FUNCTION DRIVER
   M:    Sunil Goutham <sgoutham@marvell.com>
   M:    Geetha sowjanya <gakula@marvell.com>
@@@ -15102,7 -15083,6 +15102,7 @@@ NETDEVSI
   M:    Jakub Kicinski <kuba@kernel.org>
   S:    Maintained
   F:    drivers/net/netdevsim/*
+ +F:    tools/testing/selftests/drivers/net/netdevsim/*
   
   NETEM NETWORK EMULATOR
   M:    Stephen Hemminger <stephen@networkplumber.org>
@@@ -15344,7 -15324,7 +15344,7 @@@ K:   \bmdo
   NETWORKING [MPTCP]
   M:    Matthieu Baerts <matttbe@kernel.org>
   M:    Mat Martineau <martineau@kernel.org>
- R:    Geliang Tang <geliang.tang@linux.dev>
+ R:    Geliang Tang <geliang@kernel.org>
   L:    netdev@vger.kernel.org
   L:    mptcp@lists.linux.dev
   S:    Maintained
@@@ -17202,7 -17182,7 +17202,7 @@@ R:   John Garry <john.g.garry@oracle.com
   R:    Will Deacon <will@kernel.org>
   R:    James Clark <james.clark@arm.com>
   R:    Mike Leach <mike.leach@linaro.org>
- R:    Leo Yan <leo.yan@linaro.org>
+ R:    Leo Yan <leo.yan@linux.dev>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Supported
   F:    tools/build/feature/test-libopencsd.c
@@@ -18033,13 -18013,6 +18033,13 @@@ T: git git://git.kernel.org/pub/scm/lin
   F:    Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
   F:    drivers/net/wireless/ath/ath9k/
   
+ +QUALCOMM ATHEROS QCA7K ETHERNET DRIVER
+ +M:    Stefan Wahren <wahrenst@gmx.net>
+ +L:    netdev@vger.kernel.org
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/net/qca,qca7000.txt
+ +F:    drivers/net/ethernet/qualcomm/qca*
+ +
   QUALCOMM BAM-DMUX WWAN NETWORK DRIVER
   M:    Stephan Gerhold <stephan@gerhold.net>
   L:    netdev@vger.kernel.org
@@@ -24149,6 -24122,7 +24149,6 @@@ F:   drivers/net/ethernet/xilinx/xilinx_a
   
   XILINX CAN DRIVER
   M:    Appana Durga Kedareswara rao <appana.durga.rao@xilinx.com>
- -R:    Naga Sureshkumar Relli <naga.sureshkumar.relli@xilinx.com>
   L:    linux-can@vger.kernel.org
   S:    Maintained
   F:    Documentation/devicetree/bindings/net/can/xilinx,can.yaml
diff --combined drivers/dpll/dpll_netlink.c

index cf3313517ae17f82e7b42eaf905040b47097f5ff,4ca9ad16cd957aaefaf50a74bbeb27ab3f3d1ec7..1419fd0d241c281497a67ba3a6b8dd1b345f9ec3
--- 1/drivers/dpll/dpll_netlink.c
--- 2/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@@ -121,21 -121,14 +121,21 @@@ dpll_msg_add_lock_status(struct sk_buf
                          struct netlink_ext_ack *extack)
   {
         const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+ +      enum dpll_lock_status_error status_error = 0;
         enum dpll_lock_status status;
         int ret;
   
- -      ret = ops->lock_status_get(dpll, dpll_priv(dpll), &status, extack);
+ +      ret = ops->lock_status_get(dpll, dpll_priv(dpll), &status,
+ +                                 &status_error, extack);
         if (ret)
                 return ret;
         if (nla_put_u32(msg, DPLL_A_LOCK_STATUS, status))
                 return -EMSGSIZE;
+ +      if (status_error &&
+ +          (status == DPLL_LOCK_STATUS_UNLOCKED ||
+ +           status == DPLL_LOCK_STATUS_HOLDOVER) &&
+ +          nla_put_u32(msg, DPLL_A_LOCK_STATUS_ERROR, status_error))
+ +              return -EMSGSIZE;
   
         return 0;
   }
@@@ -1206,6 -1199,7 +1206,7 @@@ int dpll_nl_pin_get_dumpit(struct sk_bu
         unsigned long i;
         int ret = 0;
   
+       mutex_lock(&dpll_lock);
         xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED,
                                  ctx->idx) {
                 if (!dpll_pin_available(pin))
@@@ -1225,6 -1219,8 +1226,8 @@@
                 }
                 genlmsg_end(skb, hdr);
         }
+       mutex_unlock(&dpll_lock);
+ 
         if (ret == -EMSGSIZE) {
                 ctx->idx = i;
                 return skb->len;
@@@ -1380,6 -1376,7 +1383,7 @@@ int dpll_nl_device_get_dumpit(struct sk
         unsigned long i;
         int ret = 0;
   
+       mutex_lock(&dpll_lock);
         xa_for_each_marked_start(&dpll_device_xa, i, dpll, DPLL_REGISTERED,
                                  ctx->idx) {
                 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
@@@ -1396,6 -1393,8 +1400,8 @@@
                 }
                 genlmsg_end(skb, hdr);
         }
+       mutex_unlock(&dpll_lock);
+ 
         if (ret == -EMSGSIZE) {
                 ctx->idx = i;
                 return skb->len;
@@@ -1446,20 -1445,6 +1452,6 @@@ dpll_unlock_doit(const struct genl_spli
         mutex_unlock(&dpll_lock);
   }
   
- int dpll_lock_dumpit(struct netlink_callback *cb)
- {
-       mutex_lock(&dpll_lock);
- 
-       return 0;
- }
- 
- int dpll_unlock_dumpit(struct netlink_callback *cb)
- {
-       mutex_unlock(&dpll_lock);
- 
-       return 0;
- }
- 
   int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
                       struct genl_info *info)
   {
diff --combined drivers/net/bonding/bond_main.c

index cb67ece47328cc50c6158cc0408d1820ef8c6dd4,a11748b8d69b435cf97971cec21c0340365ed6d1..a8a6c53095186cc32bb3c5ece1aaaff301f5056b
--- 1/drivers/net/bonding/bond_main.c
--- 2/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@@ -1819,6 -1819,8 +1819,8 @@@ void bond_xdp_set_features(struct net_d
         bond_for_each_slave(bond, slave, iter)
                 val &= slave->dev->xdp_features;
   
+       val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ 
         xdp_set_features_flag(bond_dev, val);
   }
   
@@@ -5909,9 -5911,6 +5911,6 @@@ void bond_setup(struct net_device *bond
         if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
                 bond_dev->features |= BOND_XFRM_FEATURES;
   #endif /* CONFIG_XFRM_OFFLOAD */
- 
-       if (bond_xdp_check(bond))
-               bond_dev->xdp_features = NETDEV_XDP_ACT_MASK;
   }
   
   /* Destroy a bonding device.
@@@ -6306,7 -6305,6 +6305,7 @@@ static int __init bond_check_params(str
         params->ad_actor_sys_prio = ad_actor_sys_prio;
         eth_zero_addr(params->ad_actor_system);
         params->ad_user_port_key = ad_user_port_key;
+ +      params->coupled_control = 1;
         if (packets_per_slave > 0) {
                 params->reciprocal_packets_per_slave =
                         reciprocal_value(packets_per_slave);
@@@ -6416,41 -6414,28 +6415,41 @@@ static int __net_init bond_net_init(str
         return 0;
   }
   
- -static void __net_exit bond_net_exit_batch(struct list_head *net_list)
+ +/* According to commit 69b0216ac255 ("bonding: fix bonding_masters
+ + * race condition in bond unloading") we need to remove sysfs files
+ + * before we remove our devices (done later in bond_net_exit_batch_rtnl())
+ + */
+ +static void __net_exit bond_net_pre_exit(struct net *net)
+ +{
+ +      struct bond_net *bn = net_generic(net, bond_net_id);
+ +
+ +      bond_destroy_sysfs(bn);
+ +}
+ +
+ +static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list,
+ +                                              struct list_head *dev_kill_list)
   {
         struct bond_net *bn;
         struct net *net;
- -      LIST_HEAD(list);
- -
- -      list_for_each_entry(net, net_list, exit_list) {
- -              bn = net_generic(net, bond_net_id);
- -              bond_destroy_sysfs(bn);
- -      }
   
         /* Kill off any bonds created after unregistering bond rtnl ops */
- -      rtnl_lock();
         list_for_each_entry(net, net_list, exit_list) {
                 struct bonding *bond, *tmp_bond;
   
                 bn = net_generic(net, bond_net_id);
                 list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
- -                      unregister_netdevice_queue(bond->dev, &list);
+ +                      unregister_netdevice_queue(bond->dev, dev_kill_list);
         }
- -      unregister_netdevice_many(&list);
- -      rtnl_unlock();
+ +}
+ +
+ +/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory
+ + * only after all bonds are gone") bond_destroy_proc_dir() is called
+ + * after bond_net_exit_batch_rtnl() has completed.
+ + */
+ +static void __net_exit bond_net_exit_batch(struct list_head *net_list)
+ +{
+ +      struct bond_net *bn;
+ +      struct net *net;
   
         list_for_each_entry(net, net_list, exit_list) {
                 bn = net_generic(net, bond_net_id);
@@@ -6460,8 -6445,6 +6459,8 @@@
   
   static struct pernet_operations bond_net_ops = {
         .init = bond_net_init,
+ +      .pre_exit = bond_net_pre_exit,
+ +      .exit_batch_rtnl = bond_net_exit_batch_rtnl,
         .exit_batch = bond_net_exit_batch,
         .id   = &bond_net_id,
         .size = sizeof(struct bond_net),
diff --combined drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c

index 3a15f269c7d18f865f9acc79a5c28ea4258c6f07,f59557b0cd51523896890ffe6121ffbac54f5f70..0420f17e53a9643dfb6b9a299ca038743847ac62
--- 1/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
--- 2/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@@ -607,7 -607,6 +607,7 @@@ static void bcmasp_adj_link(struct net_
         struct phy_device *phydev = dev->phydev;
         u32 cmd_bits = 0, reg;
         int changed = 0;
+ +      bool active;
   
         if (intf->old_link != phydev->link) {
                 changed = 1;
@@@ -659,8 -658,8 +659,8 @@@
                 reg |= cmd_bits;
                 umac_wl(intf, reg, UMC_CMD);
   
- -              intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0;
- -              bcmasp_eee_enable_set(intf, intf->eee.eee_active);
+ +              active = phy_init_eee(phydev, 0) >= 0;
+ +              bcmasp_eee_enable_set(intf, active);
         }
   
         reg = rgmii_rl(intf, RGMII_OOB_CNTRL);
@@@ -685,6 -684,8 +685,8 @@@ static int bcmasp_init_rx(struct bcmasp
   
         intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
         buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
+       if (!buffer_pg)
+               return -ENOMEM;
   
         dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
                            DMA_FROM_DEVICE);
@@@ -1093,6 -1094,7 +1095,7 @@@ static int bcmasp_netif_init(struct net
         return 0;
   
   err_reclaim_tx:
+       netif_napi_del(&intf->tx_napi);
         bcmasp_reclaim_free_all_tx(intf);
   err_phy_disconnect:
         if (phydev)
diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c

index 8190d23a6a495c54931c183dbce15ee36868f868,e5d6156655ba48ee607c531e7c8951aa32d6e117..8ec0b97646491caf9a098dc1edf4931e6642e45f
--- 1/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
--- 2/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@@ -61,28 -61,6 +61,6 @@@ int rvu_npc_get_tx_nibble_cfg(struct rv
         return 0;
   }
   
- static int npc_mcam_verify_pf_func(struct rvu *rvu,
-                                  struct mcam_entry *entry_data, u8 intf,
-                                  u16 pcifunc)
- {
-       u16 pf_func, pf_func_mask;
- 
-       if (is_npc_intf_rx(intf))
-               return 0;
- 
-       pf_func_mask = (entry_data->kw_mask[0] >> 32) &
-               NPC_KEX_PF_FUNC_MASK;
-       pf_func = (entry_data->kw[0] >> 32) & NPC_KEX_PF_FUNC_MASK;
- 
-       pf_func = be16_to_cpu((__force __be16)pf_func);
-       if (pf_func_mask != NPC_KEX_PF_FUNC_MASK ||
-           ((pf_func & ~RVU_PFVF_FUNC_MASK) !=
-            (pcifunc & ~RVU_PFVF_FUNC_MASK)))
-               return -EINVAL;
- 
-       return 0;
- }
- 
   void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf)
   {
         int blkaddr;
@@@ -417,7 -395,7 +395,7 @@@ static void npc_fixup_vf_rule(struct rv
         owner = mcam->entry2pfvf_map[index];
         target_func = (entry->action >> 4) & 0xffff;
         /* do nothing when target is LBK/PF or owner is not PF */
- -      if (is_pffunc_af(owner) || is_afvf(target_func) ||
+ +      if (is_pffunc_af(owner) || is_lbk_vf(rvu, target_func) ||
             (owner & RVU_PFVF_FUNC_MASK) ||
             !(target_func & RVU_PFVF_FUNC_MASK))
                 return;
@@@ -626,7 -604,7 +604,7 @@@ void rvu_npc_install_ucast_entry(struc
         int blkaddr, index;
   
         /* AF's and SDP VFs work in promiscuous mode */
- -      if (is_afvf(pcifunc) || is_sdp_vf(pcifunc))
+ +      if (is_lbk_vf(rvu, pcifunc) || is_sdp_vf(rvu, pcifunc))
                 return;
   
         blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@@ -791,7 -769,7 +769,7 @@@ void rvu_npc_install_bcast_match_entry(
                 return;
   
         /* Skip LBK VFs */
- -      if (is_afvf(pcifunc))
+ +      if (is_lbk_vf(rvu, pcifunc))
                 return;
   
         /* If pkt replication is not supported,
@@@ -871,7 -849,7 +849,7 @@@ void rvu_npc_install_allmulti_entry(str
         u16 vf_func;
   
         /* Only CGX PF/VF can add allmulticast entry */
- -      if (is_afvf(pcifunc) && is_sdp_vf(pcifunc))
+ +      if (is_lbk_vf(rvu, pcifunc) && is_sdp_vf(rvu, pcifunc))
                 return;
   
         blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@@ -2851,12 -2829,6 +2829,6 @@@ int rvu_mbox_handler_npc_mcam_write_ent
         else
                 nix_intf = pfvf->nix_rx_intf;
   
-       if (!is_pffunc_af(pcifunc) &&
-           npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) {
-               rc = NPC_MCAM_INVALID_REQ;
-               goto exit;
-       }
- 
         /* For AF installed rules, the nix_intf should be set to target NIX */
         if (is_pffunc_af(req->hdr.pcifunc))
                 nix_intf = req->intf;
@@@ -3208,10 -3180,6 +3180,6 @@@ int rvu_mbox_handler_npc_mcam_alloc_and
         if (!is_npc_interface_valid(rvu, req->intf))
                 return NPC_MCAM_INVALID_REQ;
   
-       if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf,
-                                   req->hdr.pcifunc))
-               return NPC_MCAM_INVALID_REQ;
- 
         /* Try to allocate a MCAM entry */
         entry_req.hdr.pcifunc = req->hdr.pcifunc;
         entry_req.contig = true;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/dpll.c

index 4ad3d2d3d4c8e7a24d5e74f7c76a43a37698ac43,928bf24d4b123945afc9df29ea5d758792d269cb..c9c7fddb246f83ecefd0d437506e264392d019cd
--- 1/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
--- 2/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
@@@ -41,7 -41,6 +41,7 @@@ struct mlx5_dpll_synce_status 
         enum mlx5_msees_oper_status oper_status;
         bool ho_acq;
         bool oper_freq_measure;
+ +      enum mlx5_msees_failure_reason failure_reason;
         s32 frequency_diff;
   };
   
@@@ -61,7 -60,6 +61,7 @@@ mlx5_dpll_synce_status_get(struct mlx5_
         synce_status->oper_status = MLX5_GET(msees_reg, out, oper_status);
         synce_status->ho_acq = MLX5_GET(msees_reg, out, ho_acq);
         synce_status->oper_freq_measure = MLX5_GET(msees_reg, out, oper_freq_measure);
+ +      synce_status->failure_reason = MLX5_GET(msees_reg, out, failure_reason);
         synce_status->frequency_diff = MLX5_GET(msees_reg, out, frequency_diff);
         return 0;
   }
@@@ -101,26 -99,6 +101,26 @@@ mlx5_dpll_lock_status_get(struct mlx5_d
         }
   }
   
+ +static enum dpll_lock_status_error
+ +mlx5_dpll_lock_status_error_get(struct mlx5_dpll_synce_status *synce_status)
+ +{
+ +      switch (synce_status->oper_status) {
+ +      case MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER:
+ +              fallthrough;
+ +      case MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING:
+ +              switch (synce_status->failure_reason) {
+ +              case MLX5_MSEES_FAILURE_REASON_PORT_DOWN:
+ +                      return DPLL_LOCK_STATUS_ERROR_MEDIA_DOWN;
+ +              case MLX5_MSEES_FAILURE_REASON_TOO_HIGH_FREQUENCY_DIFF:
+ +                      return DPLL_LOCK_STATUS_ERROR_FRACTIONAL_FREQUENCY_OFFSET_TOO_HIGH;
+ +              default:
+ +                      return DPLL_LOCK_STATUS_ERROR_UNDEFINED;
+ +              }
+ +      default:
+ +              return DPLL_LOCK_STATUS_ERROR_NONE;
+ +      }
+ +}
+ +
   static enum dpll_pin_state
   mlx5_dpll_pin_state_get(struct mlx5_dpll_synce_status *synce_status)
   {
@@@ -140,11 -118,10 +140,11 @@@ mlx5_dpll_pin_ffo_get(struct mlx5_dpll_
         return 0;
   }
   
- -static int mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll,
- -                                          void *priv,
- -                                          enum dpll_lock_status *status,
- -                                          struct netlink_ext_ack *extack)
+ +static int
+ +mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll, void *priv,
+ +                               enum dpll_lock_status *status,
+ +                               enum dpll_lock_status_error *status_error,
+ +                               struct netlink_ext_ack *extack)
   {
         struct mlx5_dpll_synce_status synce_status;
         struct mlx5_dpll *mdpll = priv;
@@@ -154,7 -131,6 +154,7 @@@
         if (err)
                 return err;
         *status = mlx5_dpll_lock_status_get(&synce_status);
+ +      *status_error = mlx5_dpll_lock_status_error_get(&synce_status);
         return 0;
   }
   
@@@ -413,7 -389,7 +413,7 @@@ static void mlx5_dpll_remove(struct aux
         struct mlx5_dpll *mdpll = auxiliary_get_drvdata(adev);
         struct mlx5_core_dev *mdev = mdpll->mdev;
   
-       cancel_delayed_work(&mdpll->work);
+       cancel_delayed_work_sync(&mdpll->work);
         mlx5_dpll_mdev_netdev_untrack(mdpll, mdev);
         destroy_workqueue(mdpll->wq);
         dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin,
diff --combined drivers/net/ethernet/renesas/ravb_main.c

index f9a1e9038dbf16717b8ac3b813bee7d35f48b79a,f7566cfa45ca37a3cfd02331c24f49bf576393a7..529670852bd6dc9857dad768248c73879101bbfc
--- 1/drivers/net/ethernet/renesas/ravb_main.c
--- 2/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@@ -29,7 -29,6 +29,7 @@@
   #include <linux/spinlock.h>
   #include <linux/reset.h>
   #include <linux/math64.h>
+ +#include <net/ip.h>
   
   #include "ravb.h"
   
@@@ -39,6 -38,16 +39,6 @@@
                  NETIF_MSG_RX_ERR | \
                  NETIF_MSG_TX_ERR)
   
- -static const char *ravb_rx_irqs[NUM_RX_QUEUE] = {
- -      "ch0", /* RAVB_BE */
- -      "ch1", /* RAVB_NC */
- -};
- -
- -static const char *ravb_tx_irqs[NUM_TX_QUEUE] = {
- -      "ch18", /* RAVB_BE */
- -      "ch19", /* RAVB_NC */
- -};
- -
   void ravb_modify(struct net_device *ndev, enum ravb_reg reg, u32 clear,
                  u32 set)
   {
@@@ -87,13 -96,13 +87,13 @@@ static void ravb_set_rate_gbeth(struct 
         struct ravb_private *priv = netdev_priv(ndev);
   
         switch (priv->speed) {
- -      case 10:                /* 10BASE */
+ +      case 10:                /* 10BASE */
                 ravb_write(ndev, GBETH_GECMR_SPEED_10, GECMR);
                 break;
- -      case 100:               /* 100BASE */
+ +      case 100:               /* 100BASE */
                 ravb_write(ndev, GBETH_GECMR_SPEED_100, GECMR);
                 break;
- -      case 1000:              /* 1000BASE */
+ +      case 1000:              /* 1000BASE */
                 ravb_write(ndev, GBETH_GECMR_SPEED_1000, GECMR);
                 break;
         }
@@@ -513,36 -522,6 +513,36 @@@ error
         return -ENOMEM;
   }
   
+ +static void ravb_csum_init_gbeth(struct net_device *ndev)
+ +{
+ +      bool tx_enable = ndev->features & NETIF_F_HW_CSUM;
+ +      bool rx_enable = ndev->features & NETIF_F_RXCSUM;
+ +
+ +      if (!(tx_enable || rx_enable))
+ +              goto done;
+ +
+ +      ravb_write(ndev, 0, CSR0);
+ +      if (ravb_wait(ndev, CSR0, CSR0_TPE | CSR0_RPE, 0)) {
+ +              netdev_err(ndev, "Timeout enabling hardware checksum\n");
+ +
+ +              if (tx_enable)
+ +                      ndev->features &= ~NETIF_F_HW_CSUM;
+ +
+ +              if (rx_enable)
+ +                      ndev->features &= ~NETIF_F_RXCSUM;
+ +      } else {
+ +              if (tx_enable)
+ +                      ravb_write(ndev, CSR1_TIP4 | CSR1_TTCP4 | CSR1_TUDP4, CSR1);
+ +
+ +              if (rx_enable)
+ +                      ravb_write(ndev, CSR2_RIP4 | CSR2_RTCP4 | CSR2_RUDP4 | CSR2_RICMP4,
+ +                                 CSR2);
+ +      }
+ +
+ +done:
+ +      ravb_write(ndev, CSR0_TPE | CSR0_RPE, CSR0);
+ +}
+ +
   static void ravb_emac_init_gbeth(struct net_device *ndev)
   {
         struct ravb_private *priv = netdev_priv(ndev);
@@@ -574,8 -553,7 +574,8 @@@
   
         /* E-MAC status register clear */
         ravb_write(ndev, ECSR_ICD | ECSR_LCHNG | ECSR_PFRI, ECSR);
- -      ravb_write(ndev, CSR0_TPE | CSR0_RPE, CSR0);
+ +
+ +      ravb_csum_init_gbeth(ndev);
   
         /* E-MAC interrupt enable register */
         ravb_write(ndev, ECSIPR_ICDIP, ECSIPR);
@@@ -756,30 -734,6 +756,30 @@@ static void ravb_get_tx_tstamp(struct n
         }
   }
   
+ +static void ravb_rx_csum_gbeth(struct sk_buff *skb)
+ +{
+ +      __wsum csum_ip_hdr, csum_proto;
+ +      u8 *hw_csum;
+ +
+ +      /* The hardware checksum status is contained in sizeof(__sum16) * 2 = 4
+ +       * bytes appended to packet data. First 2 bytes is ip header checksum
+ +       * and last 2 bytes is protocol checksum.
+ +       */
+ +      if (unlikely(skb->len < sizeof(__sum16) * 2))
+ +              return;
+ +
+ +      hw_csum = skb_tail_pointer(skb) - sizeof(__sum16);
+ +      csum_proto = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+ +
+ +      hw_csum -= sizeof(__sum16);
+ +      csum_ip_hdr = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+ +      skb_trim(skb, skb->len - 2 * sizeof(__sum16));
+ +
+ +      /* TODO: IPV6 Rx checksum */
+ +      if (skb->protocol == htons(ETH_P_IP) && !csum_ip_hdr && !csum_proto)
+ +              skb->ip_summed = CHECKSUM_UNNECESSARY;
+ +}
+ +
   static void ravb_rx_csum(struct sk_buff *skb)
   {
         u8 *hw_csum;
@@@ -818,29 -772,25 +818,25 @@@ static bool ravb_rx_gbeth(struct net_de
         struct ravb_rx_desc *desc;
         struct sk_buff *skb;
         dma_addr_t dma_addr;
+       int rx_packets = 0;
         u8  desc_status;
-       int boguscnt;
         u16 pkt_len;
         u8  die_dt;
         int entry;
         int limit;
+       int i;
   
         entry = priv->cur_rx[q] % priv->num_rx_ring[q];
-       boguscnt = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
+       limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
         stats = &priv->stats[q];
   
-       boguscnt = min(boguscnt, *quota);
-       limit = boguscnt;
         desc = &priv->gbeth_rx_ring[entry];
-       while (desc->die_dt != DT_FEMPTY) {
+       for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) {
                 /* Descriptor type must be checked before all other reads */
                 dma_rmb();
                 desc_status = desc->msc;
                 pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
   
-               if (--boguscnt < 0)
-                       break;
- 
                 /* We use 0-byte descriptors to mark the DMA mapping errors */
                 if (!pkt_len)
                         continue;
@@@ -865,10 -815,8 +861,10 @@@
                                 skb = ravb_get_skb_gbeth(ndev, entry, desc);
                                 skb_put(skb, pkt_len);
                                 skb->protocol = eth_type_trans(skb, ndev);
+ +                              if (ndev->features & NETIF_F_RXCSUM)
+ +                                      ravb_rx_csum_gbeth(skb);
                                 napi_gro_receive(&priv->napi[q], skb);
-                               stats->rx_packets++;
+                               rx_packets++;
                                 stats->rx_bytes += pkt_len;
                                 break;
                         case DT_FSTART:
@@@ -894,11 -842,9 +890,11 @@@
                                 dev_kfree_skb(skb);
                                 priv->rx_1st_skb->protocol =
                                         eth_type_trans(priv->rx_1st_skb, ndev);
+ +                              if (ndev->features & NETIF_F_RXCSUM)
+ +                                      ravb_rx_csum_gbeth(skb);
                                 napi_gro_receive(&priv->napi[q],
                                                  priv->rx_1st_skb);
-                               stats->rx_packets++;
+                               rx_packets++;
                                 stats->rx_bytes += pkt_len;
                                 break;
                         }
@@@ -937,9 -883,9 +933,9 @@@
                 desc->die_dt = DT_FEMPTY;
         }
   
-       *quota -= limit - (++boguscnt);
- 
-       return boguscnt <= 0;
+       stats->rx_packets += rx_packets;
+       *quota -= rx_packets;
+       return *quota == 0;
   }
   
   /* Packet receive function for Ethernet AVB */
@@@ -1142,23 -1088,11 +1138,23 @@@ static irqreturn_t ravb_emac_interrupt(
   {
         struct net_device *ndev = dev_id;
         struct ravb_private *priv = netdev_priv(ndev);
+ +      struct device *dev = &priv->pdev->dev;
+ +      irqreturn_t result = IRQ_HANDLED;
+ +
+ +      pm_runtime_get_noresume(dev);
+ +
+ +      if (unlikely(!pm_runtime_active(dev))) {
+ +              result = IRQ_NONE;
+ +              goto out_rpm_put;
+ +      }
   
         spin_lock(&priv->lock);
         ravb_emac_interrupt_unlocked(ndev);
         spin_unlock(&priv->lock);
- -      return IRQ_HANDLED;
+ +
+ +out_rpm_put:
+ +      pm_runtime_put_noidle(dev);
+ +      return result;
   }
   
   /* Error interrupt handler */
@@@ -1238,15 -1172,9 +1234,15 @@@ static irqreturn_t ravb_interrupt(int i
         struct net_device *ndev = dev_id;
         struct ravb_private *priv = netdev_priv(ndev);
         const struct ravb_hw_info *info = priv->info;
+ +      struct device *dev = &priv->pdev->dev;
         irqreturn_t result = IRQ_NONE;
         u32 iss;
   
+ +      pm_runtime_get_noresume(dev);
+ +
+ +      if (unlikely(!pm_runtime_active(dev)))
+ +              goto out_rpm_put;
+ +
         spin_lock(&priv->lock);
         /* Get interrupt status */
         iss = ravb_read(ndev, ISS);
@@@ -1290,9 -1218,6 +1286,9 @@@
         }
   
         spin_unlock(&priv->lock);
+ +
+ +out_rpm_put:
+ +      pm_runtime_put_noidle(dev);
         return result;
   }
   
@@@ -1301,15 -1226,9 +1297,15 @@@ static irqreturn_t ravb_multi_interrupt
   {
         struct net_device *ndev = dev_id;
         struct ravb_private *priv = netdev_priv(ndev);
+ +      struct device *dev = &priv->pdev->dev;
         irqreturn_t result = IRQ_NONE;
         u32 iss;
   
+ +      pm_runtime_get_noresume(dev);
+ +
+ +      if (unlikely(!pm_runtime_active(dev)))
+ +              goto out_rpm_put;
+ +
         spin_lock(&priv->lock);
         /* Get interrupt status */
         iss = ravb_read(ndev, ISS);
@@@ -1331,9 -1250,6 +1327,9 @@@
         }
   
         spin_unlock(&priv->lock);
+ +
+ +out_rpm_put:
+ +      pm_runtime_put_noidle(dev);
         return result;
   }
   
@@@ -1341,14 -1257,8 +1337,14 @@@ static irqreturn_t ravb_dma_interrupt(i
   {
         struct net_device *ndev = dev_id;
         struct ravb_private *priv = netdev_priv(ndev);
+ +      struct device *dev = &priv->pdev->dev;
         irqreturn_t result = IRQ_NONE;
   
+ +      pm_runtime_get_noresume(dev);
+ +
+ +      if (unlikely(!pm_runtime_active(dev)))
+ +              goto out_rpm_put;
+ +
         spin_lock(&priv->lock);
   
         /* Network control/Best effort queue RX/TX */
@@@ -1356,9 -1266,6 +1352,9 @@@
                 result = IRQ_HANDLED;
   
         spin_unlock(&priv->lock);
+ +
+ +out_rpm_put:
+ +      pm_runtime_put_noidle(dev);
         return result;
   }
   
@@@ -1377,16 -1284,25 +1373,16 @@@ static int ravb_poll(struct napi_struc
         struct net_device *ndev = napi->dev;
         struct ravb_private *priv = netdev_priv(ndev);
         const struct ravb_hw_info *info = priv->info;
- -      bool gptp = info->gptp || info->ccc_gac;
- -      struct ravb_rx_desc *desc;
         unsigned long flags;
         int q = napi - priv->napi;
         int mask = BIT(q);
         int quota = budget;
- -      unsigned int entry;
   
- -      if (!gptp) {
- -              entry = priv->cur_rx[q] % priv->num_rx_ring[q];
- -              desc = &priv->gbeth_rx_ring[entry];
- -      }
         /* Processing RX Descriptor Ring */
         /* Clear RX interrupt */
         ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
- -      if (gptp || desc->die_dt != DT_FEMPTY) {
- -              if (ravb_rx(ndev, &quota, q))
- -                      goto out;
- -      }
+ +      if (ravb_rx(ndev, &quota, q))
+ +              goto out;
   
         /* Processing TX Descriptor Ring */
         spin_lock_irqsave(&priv->lock, flags);
@@@ -1816,154 -1732,89 +1812,154 @@@ static const struct ethtool_ops ravb_et
         .set_wol                = ravb_set_wol,
   };
   
- -static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
- -                              struct net_device *ndev, struct device *dev,
- -                              const char *ch)
+ +static int ravb_set_config_mode(struct net_device *ndev)
   {
- -      char *name;
+ +      struct ravb_private *priv = netdev_priv(ndev);
+ +      const struct ravb_hw_info *info = priv->info;
         int error;
   
- -      name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", ndev->name, ch);
- -      if (!name)
- -              return -ENOMEM;
- -      error = request_irq(irq, handler, 0, name, ndev);
- -      if (error)
- -              netdev_err(ndev, "cannot request IRQ %s\n", name);
+ +      if (info->gptp) {
+ +              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ +              if (error)
+ +                      return error;
+ +              /* Set CSEL value */
+ +              ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
+ +      } else if (info->ccc_gac) {
+ +              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB);
+ +      } else {
+ +              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ +      }
   
         return error;
   }
   
+ +static void ravb_set_gti(struct net_device *ndev)
+ +{
+ +      struct ravb_private *priv = netdev_priv(ndev);
+ +      const struct ravb_hw_info *info = priv->info;
+ +
+ +      if (!(info->gptp || info->ccc_gac))
+ +              return;
+ +
+ +      ravb_write(ndev, priv->gti_tiv, GTI);
+ +
+ +      /* Request GTI loading */
+ +      ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
+ +}
+ +
+ +static int ravb_compute_gti(struct net_device *ndev)
+ +{
+ +      struct ravb_private *priv = netdev_priv(ndev);
+ +      const struct ravb_hw_info *info = priv->info;
+ +      struct device *dev = ndev->dev.parent;
+ +      unsigned long rate;
+ +      u64 inc;
+ +
+ +      if (!(info->gptp || info->ccc_gac))
+ +              return 0;
+ +
+ +      if (info->gptp_ref_clk)
+ +              rate = clk_get_rate(priv->gptp_clk);
+ +      else
+ +              rate = clk_get_rate(priv->clk);
+ +      if (!rate)
+ +              return -EINVAL;
+ +
+ +      inc = div64_ul(1000000000ULL << 20, rate);
+ +
+ +      if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) {
+ +              dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n",
+ +                      inc, GTI_TIV_MIN, GTI_TIV_MAX);
+ +              return -EINVAL;
+ +      }
+ +      priv->gti_tiv = inc;
+ +
+ +      return 0;
+ +}
+ +
+ +/* Set tx and rx clock internal delay modes */
+ +static void ravb_parse_delay_mode(struct device_node *np, struct net_device *ndev)
+ +{
+ +      struct ravb_private *priv = netdev_priv(ndev);
+ +      bool explicit_delay = false;
+ +      u32 delay;
+ +
+ +      if (!priv->info->internal_delay)
+ +              return;
+ +
+ +      if (!of_property_read_u32(np, "rx-internal-delay-ps", &delay)) {
+ +              /* Valid values are 0 and 1800, according to DT bindings */
+ +              priv->rxcidm = !!delay;
+ +              explicit_delay = true;
+ +      }
+ +      if (!of_property_read_u32(np, "tx-internal-delay-ps", &delay)) {
+ +              /* Valid values are 0 and 2000, according to DT bindings */
+ +              priv->txcidm = !!delay;
+ +              explicit_delay = true;
+ +      }
+ +
+ +      if (explicit_delay)
+ +              return;
+ +
+ +      /* Fall back to legacy rgmii-*id behavior */
+ +      if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ +          priv->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+ +              priv->rxcidm = 1;
+ +              priv->rgmii_override = 1;
+ +      }
+ +
+ +      if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ +          priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+ +              priv->txcidm = 1;
+ +              priv->rgmii_override = 1;
+ +      }
+ +}
+ +
+ +static void ravb_set_delay_mode(struct net_device *ndev)
+ +{
+ +      struct ravb_private *priv = netdev_priv(ndev);
+ +      u32 set = 0;
+ +
+ +      if (!priv->info->internal_delay)
+ +              return;
+ +
+ +      if (priv->rxcidm)
+ +              set |= APSR_RDM;
+ +      if (priv->txcidm)
+ +              set |= APSR_TDM;
+ +      ravb_modify(ndev, APSR, APSR_RDM | APSR_TDM, set);
+ +}
+ +
   /* Network device open function for Ethernet AVB */
   static int ravb_open(struct net_device *ndev)
   {
         struct ravb_private *priv = netdev_priv(ndev);
         const struct ravb_hw_info *info = priv->info;
- -      struct platform_device *pdev = priv->pdev;
- -      struct device *dev = &pdev->dev;
         int error;
   
         napi_enable(&priv->napi[RAVB_BE]);
         if (info->nc_queues)
                 napi_enable(&priv->napi[RAVB_NC]);
   
- -      if (!info->multi_irqs) {
- -              error = request_irq(ndev->irq, ravb_interrupt, IRQF_SHARED,
- -                                  ndev->name, ndev);
- -              if (error) {
- -                      netdev_err(ndev, "cannot request IRQ\n");
- -                      goto out_napi_off;
- -              }
- -      } else {
- -              error = ravb_hook_irq(ndev->irq, ravb_multi_interrupt, ndev,
- -                                    dev, "ch22:multi");
- -              if (error)
- -                      goto out_napi_off;
- -              error = ravb_hook_irq(priv->emac_irq, ravb_emac_interrupt, ndev,
- -                                    dev, "ch24:emac");
- -              if (error)
- -                      goto out_free_irq;
- -              error = ravb_hook_irq(priv->rx_irqs[RAVB_BE], ravb_be_interrupt,
- -                                    ndev, dev, "ch0:rx_be");
- -              if (error)
- -                      goto out_free_irq_emac;
- -              error = ravb_hook_irq(priv->tx_irqs[RAVB_BE], ravb_be_interrupt,
- -                                    ndev, dev, "ch18:tx_be");
- -              if (error)
- -                      goto out_free_irq_be_rx;
- -              error = ravb_hook_irq(priv->rx_irqs[RAVB_NC], ravb_nc_interrupt,
- -                                    ndev, dev, "ch1:rx_nc");
- -              if (error)
- -                      goto out_free_irq_be_tx;
- -              error = ravb_hook_irq(priv->tx_irqs[RAVB_NC], ravb_nc_interrupt,
- -                                    ndev, dev, "ch19:tx_nc");
- -              if (error)
- -                      goto out_free_irq_nc_rx;
- -
- -              if (info->err_mgmt_irqs) {
- -                      error = ravb_hook_irq(priv->erra_irq, ravb_multi_interrupt,
- -                                            ndev, dev, "err_a");
- -                      if (error)
- -                              goto out_free_irq_nc_tx;
- -                      error = ravb_hook_irq(priv->mgmta_irq, ravb_multi_interrupt,
- -                                            ndev, dev, "mgmt_a");
- -                      if (error)
- -                              goto out_free_irq_erra;
- -              }
- -      }
+ +      /* Set AVB config mode */
+ +      error = ravb_set_config_mode(ndev);
+ +      if (error)
+ +              goto out_napi_off;
+ +
+ +      ravb_set_delay_mode(ndev);
+ +      ravb_write(ndev, priv->desc_bat_dma, DBAT);
   
         /* Device init */
         error = ravb_dmac_init(ndev);
         if (error)
- -              goto out_free_irq_mgmta;
+ +              goto out_set_reset;
+ +
         ravb_emac_init(ndev);
   
+ +      ravb_set_gti(ndev);
+ +
         /* Initialise PTP Clock driver */
- -      if (info->gptp)
+ +      if (info->gptp || info->ccc_gac)
                 ravb_ptp_init(ndev, priv->pdev);
   
         /* PHY control start */
@@@ -1977,11 -1828,29 +1973,11 @@@
   
   out_ptp_stop:
         /* Stop PTP Clock driver */
- -      if (info->gptp)
+ +      if (info->gptp || info->ccc_gac)
                 ravb_ptp_stop(ndev);
         ravb_stop_dma(ndev);
- -out_free_irq_mgmta:
- -      if (!info->multi_irqs)
- -              goto out_free_irq;
- -      if (info->err_mgmt_irqs)
- -              free_irq(priv->mgmta_irq, ndev);
- -out_free_irq_erra:
- -      if (info->err_mgmt_irqs)
- -              free_irq(priv->erra_irq, ndev);
- -out_free_irq_nc_tx:
- -      free_irq(priv->tx_irqs[RAVB_NC], ndev);
- -out_free_irq_nc_rx:
- -      free_irq(priv->rx_irqs[RAVB_NC], ndev);
- -out_free_irq_be_tx:
- -      free_irq(priv->tx_irqs[RAVB_BE], ndev);
- -out_free_irq_be_rx:
- -      free_irq(priv->rx_irqs[RAVB_BE], ndev);
- -out_free_irq_emac:
- -      free_irq(priv->emac_irq, ndev);
- -out_free_irq:
- -      free_irq(ndev->irq, ndev);
+ +out_set_reset:
+ +      ravb_set_opmode(ndev, CCC_OPC_RESET);
   out_napi_off:
         if (info->nc_queues)
                 napi_disable(&priv->napi[RAVB_NC]);
@@@ -2066,36 -1935,6 +2062,36 @@@ out_unlock
         rtnl_unlock();
   }
   
+ +static bool ravb_can_tx_csum_gbeth(struct sk_buff *skb)
+ +{
+ +      struct iphdr *ip = ip_hdr(skb);
+ +
+ +      /* TODO: Need to add support for VLAN tag 802.1Q */
+ +      if (skb_vlan_tag_present(skb))
+ +              return false;
+ +
+ +      /* TODO: Need to add hardware checksum for IPv6 */
+ +      if (skb->protocol != htons(ETH_P_IP))
+ +              return false;
+ +
+ +      switch (ip->protocol) {
+ +      case IPPROTO_TCP:
+ +              break;
+ +      case IPPROTO_UDP:
+ +              /* If the checksum value in the UDP header field is 0, TOE does
+ +               * not calculate checksum for UDP part of this frame as it is
+ +               * optional function as per standards.
+ +               */
+ +              if (udp_hdr(skb)->check == 0)
+ +                      return false;
+ +              break;
+ +      default:
+ +              return false;
+ +      }
+ +
+ +      return true;
+ +}
+ +
   /* Packet transmit function for Ethernet AVB */
   static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
   {
@@@ -2111,9 -1950,6 +2107,9 @@@
         u32 entry;
         u32 len;
   
+ +      if (skb->ip_summed == CHECKSUM_PARTIAL && !ravb_can_tx_csum_gbeth(skb))
+ +              skb_checksum_help(skb);
+ +
         spin_lock_irqsave(&priv->lock, flags);
         if (priv->cur_tx[q] - priv->dirty_tx[q] > (priv->num_tx_ring[q] - 1) *
             num_tx_desc) {
@@@ -2322,7 -2158,7 +2318,7 @@@ static int ravb_close(struct net_devic
         ravb_write(ndev, 0, TIC);
   
         /* Stop PTP Clock driver */
- -      if (info->gptp)
+ +      if (info->gptp || info->ccc_gac)
                 ravb_ptp_stop(ndev);
   
         /* Set the config mode to stop the AVB-DMAC's processes */
@@@ -2349,6 -2185,19 +2345,6 @@@
   
         cancel_work_sync(&priv->work);
   
- -      if (info->multi_irqs) {
- -              free_irq(priv->tx_irqs[RAVB_NC], ndev);
- -              free_irq(priv->rx_irqs[RAVB_NC], ndev);
- -              free_irq(priv->tx_irqs[RAVB_BE], ndev);
- -              free_irq(priv->rx_irqs[RAVB_BE], ndev);
- -              free_irq(priv->emac_irq, ndev);
- -              if (info->err_mgmt_irqs) {
- -                      free_irq(priv->erra_irq, ndev);
- -                      free_irq(priv->mgmta_irq, ndev);
- -              }
- -      }
- -      free_irq(ndev->irq, ndev);
- -
         if (info->nc_queues)
                 napi_disable(&priv->napi[RAVB_NC]);
         napi_disable(&priv->napi[RAVB_BE]);
@@@ -2358,8 -2207,7 +2354,8 @@@
         if (info->nc_queues)
                 ravb_ring_free(ndev, RAVB_NC);
   
- -      return 0;
+ +      /* Set reset mode. */
+ +      return ravb_set_opmode(ndev, CCC_OPC_RESET);
   }
   
   static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req)
@@@ -2482,59 -2330,11 +2478,59 @@@ static void ravb_set_rx_csum(struct net
         spin_unlock_irqrestore(&priv->lock, flags);
   }
   
+ +static int ravb_endisable_csum_gbeth(struct net_device *ndev, enum ravb_reg reg,
+ +                                   u32 val, u32 mask)
+ +{
+ +      u32 csr0 = CSR0_TPE | CSR0_RPE;
+ +      int ret;
+ +
+ +      ravb_write(ndev, csr0 & ~mask, CSR0);
+ +      ret = ravb_wait(ndev, CSR0, mask, 0);
+ +      if (!ret)
+ +              ravb_write(ndev, val, reg);
+ +
+ +      ravb_write(ndev, csr0, CSR0);
+ +
+ +      return ret;
+ +}
+ +
   static int ravb_set_features_gbeth(struct net_device *ndev,
                                    netdev_features_t features)
   {
- -      /* Place holder */
- -      return 0;
+ +      netdev_features_t changed = ndev->features ^ features;
+ +      struct ravb_private *priv = netdev_priv(ndev);
+ +      unsigned long flags;
+ +      int ret = 0;
+ +      u32 val;
+ +
+ +      spin_lock_irqsave(&priv->lock, flags);
+ +      if (changed & NETIF_F_RXCSUM) {
+ +              if (features & NETIF_F_RXCSUM)
+ +                      val = CSR2_RIP4 | CSR2_RTCP4 | CSR2_RUDP4 | CSR2_RICMP4;
+ +              else
+ +                      val = 0;
+ +
+ +              ret = ravb_endisable_csum_gbeth(ndev, CSR2, val, CSR0_RPE);
+ +              if (ret)
+ +                      goto done;
+ +      }
+ +
+ +      if (changed & NETIF_F_HW_CSUM) {
+ +              if (features & NETIF_F_HW_CSUM)
+ +                      val = CSR1_TIP4 | CSR1_TTCP4 | CSR1_TUDP4;
+ +              else
+ +                      val = 0;
+ +
+ +              ret = ravb_endisable_csum_gbeth(ndev, CSR1, val, CSR0_TPE);
+ +              if (ret)
+ +                      goto done;
+ +      }
+ +
+ +      ndev->features = features;
+ +done:
+ +      spin_unlock_irqrestore(&priv->lock, flags);
+ +
+ +      return ret;
   }
   
   static int ravb_set_features_rcar(struct net_device *ndev,
@@@ -2714,8 -2514,6 +2710,8 @@@ static const struct ravb_hw_info gbeth_
         .emac_init = ravb_emac_init_gbeth,
         .gstrings_stats = ravb_gstrings_stats_gbeth,
         .gstrings_size = sizeof(ravb_gstrings_stats_gbeth),
+ +      .net_hw_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM,
+ +      .net_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM,
         .stats_len = ARRAY_SIZE(ravb_gstrings_stats_gbeth),
         .max_rx_len = ALIGN(GBETH_RX_BUFF_MAX, RAVB_ALIGN),
         .tccr_mask = TCCR_TSRQ0,
@@@ -2739,88 -2537,100 +2735,88 @@@ static const struct of_device_id ravb_m
   };
   MODULE_DEVICE_TABLE(of, ravb_match_table);
   
- -static int ravb_set_gti(struct net_device *ndev)
+ +static int ravb_setup_irq(struct ravb_private *priv, const char *irq_name,
+ +                        const char *ch, int *irq, irq_handler_t handler)
   {
- -      struct ravb_private *priv = netdev_priv(ndev);
- -      const struct ravb_hw_info *info = priv->info;
- -      struct device *dev = ndev->dev.parent;
- -      unsigned long rate;
- -      uint64_t inc;
- -
- -      if (info->gptp_ref_clk)
- -              rate = clk_get_rate(priv->gptp_clk);
- -      else
- -              rate = clk_get_rate(priv->clk);
- -      if (!rate)
- -              return -EINVAL;
+ +      struct platform_device *pdev = priv->pdev;
+ +      struct net_device *ndev = priv->ndev;
+ +      struct device *dev = &pdev->dev;
+ +      const char *dev_name;
+ +      unsigned long flags;
+ +      int error;
   
- -      inc = div64_ul(1000000000ULL << 20, rate);
+ +      if (irq_name) {
+ +              dev_name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", ndev->name, ch);
+ +              if (!dev_name)
+ +                      return -ENOMEM;
   
- -      if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) {
- -              dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n",
- -                      inc, GTI_TIV_MIN, GTI_TIV_MAX);
- -              return -EINVAL;
+ +              *irq = platform_get_irq_byname(pdev, irq_name);
+ +              flags = 0;
+ +      } else {
+ +              dev_name = ndev->name;
+ +              *irq = platform_get_irq(pdev, 0);
+ +              flags = IRQF_SHARED;
         }
+ +      if (*irq < 0)
+ +              return *irq;
   
- -      ravb_write(ndev, inc, GTI);
+ +      error = devm_request_irq(dev, *irq, handler, flags, dev_name, ndev);
+ +      if (error)
+ +              netdev_err(ndev, "cannot request IRQ %s\n", dev_name);
   
- -      return 0;
+ +      return error;
   }
   
- -static int ravb_set_config_mode(struct net_device *ndev)
+ +static int ravb_setup_irqs(struct ravb_private *priv)
   {
- -      struct ravb_private *priv = netdev_priv(ndev);
         const struct ravb_hw_info *info = priv->info;
- -      int error;
+ +      struct net_device *ndev = priv->ndev;
+ +      const char *irq_name, *emac_irq_name;
+ +      int error, irq;
   
- -      if (info->gptp) {
- -              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
- -              if (error)
- -                      return error;
- -              /* Set CSEL value */
- -              ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
- -      } else if (info->ccc_gac) {
- -              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB);
+ +      if (!info->multi_irqs)
+ +              return ravb_setup_irq(priv, NULL, NULL, &ndev->irq, ravb_interrupt);
+ +
+ +      if (info->err_mgmt_irqs) {
+ +              irq_name = "dia";
+ +              emac_irq_name = "line3";
         } else {
- -              error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ +              irq_name = "ch22";
+ +              emac_irq_name = "ch24";
         }
   
- -      return error;
- -}
- -
- -/* Set tx and rx clock internal delay modes */
- -static void ravb_parse_delay_mode(struct device_node *np, struct net_device *ndev)
- -{
- -      struct ravb_private *priv = netdev_priv(ndev);
- -      bool explicit_delay = false;
- -      u32 delay;
+ +      error = ravb_setup_irq(priv, irq_name, "ch22:multi", &ndev->irq, ravb_multi_interrupt);
+ +      if (error)
+ +              return error;
   
- -      if (!of_property_read_u32(np, "rx-internal-delay-ps", &delay)) {
- -              /* Valid values are 0 and 1800, according to DT bindings */
- -              priv->rxcidm = !!delay;
- -              explicit_delay = true;
- -      }
- -      if (!of_property_read_u32(np, "tx-internal-delay-ps", &delay)) {
- -              /* Valid values are 0 and 2000, according to DT bindings */
- -              priv->txcidm = !!delay;
- -              explicit_delay = true;
- -      }
+ +      error = ravb_setup_irq(priv, emac_irq_name, "ch24:emac", &priv->emac_irq,
+ +                             ravb_emac_interrupt);
+ +      if (error)
+ +              return error;
   
- -      if (explicit_delay)
- -              return;
+ +      if (info->err_mgmt_irqs) {
+ +              error = ravb_setup_irq(priv, "err_a", "err_a", &irq, ravb_multi_interrupt);
+ +              if (error)
+ +                      return error;
   
- -      /* Fall back to legacy rgmii-*id behavior */
- -      if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
- -          priv->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) {
- -              priv->rxcidm = 1;
- -              priv->rgmii_override = 1;
+ +              error = ravb_setup_irq(priv, "mgmt_a", "mgmt_a", &irq, ravb_multi_interrupt);
+ +              if (error)
+ +                      return error;
         }
   
- -      if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
- -          priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
- -              priv->txcidm = 1;
- -              priv->rgmii_override = 1;
- -      }
- -}
+ +      error = ravb_setup_irq(priv, "ch0", "ch0:rx_be", &irq, ravb_be_interrupt);
+ +      if (error)
+ +              return error;
   
- -static void ravb_set_delay_mode(struct net_device *ndev)
- -{
- -      struct ravb_private *priv = netdev_priv(ndev);
- -      u32 set = 0;
+ +      error = ravb_setup_irq(priv, "ch1", "ch1:rx_nc", &irq, ravb_nc_interrupt);
+ +      if (error)
+ +              return error;
   
- -      if (priv->rxcidm)
- -              set |= APSR_RDM;
- -      if (priv->txcidm)
- -              set |= APSR_TDM;
- -      ravb_modify(ndev, APSR, APSR_RDM | APSR_TDM, set);
+ +      error = ravb_setup_irq(priv, "ch18", "ch18:tx_be", &irq, ravb_be_interrupt);
+ +      if (error)
+ +              return error;
+ +
+ +      return ravb_setup_irq(priv, "ch19", "ch19:tx_nc", &irq, ravb_nc_interrupt);
   }
   
   static int ravb_probe(struct platform_device *pdev)
@@@ -2830,8 -2640,9 +2826,8 @@@
         struct reset_control *rstc;
         struct ravb_private *priv;
         struct net_device *ndev;
- -      int error, irq, q;
         struct resource *res;
- -      int i;
+ +      int error, q;
   
         if (!np) {
                 dev_err(&pdev->dev,
@@@ -2839,7 -2650,7 +2835,7 @@@
                 return -EINVAL;
         }
   
- -      rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+ +      rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
         if (IS_ERR(rstc))
                 return dev_err_probe(&pdev->dev, PTR_ERR(rstc),
                                      "failed to get cpg reset\n");
@@@ -2858,6 -2669,25 +2854,6 @@@
         if (error)
                 goto out_free_netdev;
   
- -      pm_runtime_enable(&pdev->dev);
- -      error = pm_runtime_resume_and_get(&pdev->dev);
- -      if (error < 0)
- -              goto out_rpm_disable;
- -
- -      if (info->multi_irqs) {
- -              if (info->err_mgmt_irqs)
- -                      irq = platform_get_irq_byname(pdev, "dia");
- -              else
- -                      irq = platform_get_irq_byname(pdev, "ch22");
- -      } else {
- -              irq = platform_get_irq(pdev, 0);
- -      }
- -      if (irq < 0) {
- -              error = irq;
- -              goto out_release;
- -      }
- -      ndev->irq = irq;
- -
         SET_NETDEV_DEV(ndev, &pdev->dev);
   
         priv = netdev_priv(ndev);
@@@ -2872,41 -2702,10 +2868,41 @@@
                 priv->num_rx_ring[RAVB_NC] = NC_RX_RING_SIZE;
         }
   
+ +      error = ravb_setup_irqs(priv);
+ +      if (error)
+ +              goto out_reset_assert;
+ +
+ +      priv->clk = devm_clk_get(&pdev->dev, NULL);
+ +      if (IS_ERR(priv->clk)) {
+ +              error = PTR_ERR(priv->clk);
+ +              goto out_reset_assert;
+ +      }
+ +
+ +      if (info->gptp_ref_clk) {
+ +              priv->gptp_clk = devm_clk_get(&pdev->dev, "gptp");
+ +              if (IS_ERR(priv->gptp_clk)) {
+ +                      error = PTR_ERR(priv->gptp_clk);
+ +                      goto out_reset_assert;
+ +              }
+ +      }
+ +
+ +      priv->refclk = devm_clk_get_optional(&pdev->dev, "refclk");
+ +      if (IS_ERR(priv->refclk)) {
+ +              error = PTR_ERR(priv->refclk);
+ +              goto out_reset_assert;
+ +      }
+ +      clk_prepare(priv->refclk);
+ +
+ +      platform_set_drvdata(pdev, ndev);
+ +      pm_runtime_enable(&pdev->dev);
+ +      error = pm_runtime_resume_and_get(&pdev->dev);
+ +      if (error < 0)
+ +              goto out_rpm_disable;
+ +
         priv->addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
         if (IS_ERR(priv->addr)) {
                 error = PTR_ERR(priv->addr);
- -              goto out_release;
+ +              goto out_rpm_put;
         }
   
         /* The Ether-specific entries in the device structure. */
@@@ -2917,12 -2716,78 +2913,12 @@@
   
         error = of_get_phy_mode(np, &priv->phy_interface);
         if (error && error != -ENODEV)
- -              goto out_release;
+ +              goto out_rpm_put;
   
         priv->no_avb_link = of_property_read_bool(np, "renesas,no-ether-link");
         priv->avb_link_active_low =
                 of_property_read_bool(np, "renesas,ether-link-active-low");
   
- -      if (info->multi_irqs) {
- -              if (info->err_mgmt_irqs)
- -                      irq = platform_get_irq_byname(pdev, "line3");
- -              else
- -                      irq = platform_get_irq_byname(pdev, "ch24");
- -              if (irq < 0) {
- -                      error = irq;
- -                      goto out_release;
- -              }
- -              priv->emac_irq = irq;
- -              for (i = 0; i < NUM_RX_QUEUE; i++) {
- -                      irq = platform_get_irq_byname(pdev, ravb_rx_irqs[i]);
- -                      if (irq < 0) {
- -                              error = irq;
- -                              goto out_release;
- -                      }
- -                      priv->rx_irqs[i] = irq;
- -              }
- -              for (i = 0; i < NUM_TX_QUEUE; i++) {
- -                      irq = platform_get_irq_byname(pdev, ravb_tx_irqs[i]);
- -                      if (irq < 0) {
- -                              error = irq;
- -                              goto out_release;
- -                      }
- -                      priv->tx_irqs[i] = irq;
- -              }
- -
- -              if (info->err_mgmt_irqs) {
- -                      irq = platform_get_irq_byname(pdev, "err_a");
- -                      if (irq < 0) {
- -                              error = irq;
- -                              goto out_release;
- -                      }
- -                      priv->erra_irq = irq;
- -
- -                      irq = platform_get_irq_byname(pdev, "mgmt_a");
- -                      if (irq < 0) {
- -                              error = irq;
- -                              goto out_release;
- -                      }
- -                      priv->mgmta_irq = irq;
- -              }
- -      }
- -
- -      priv->clk = devm_clk_get(&pdev->dev, NULL);
- -      if (IS_ERR(priv->clk)) {
- -              error = PTR_ERR(priv->clk);
- -              goto out_release;
- -      }
- -
- -      priv->refclk = devm_clk_get_optional(&pdev->dev, "refclk");
- -      if (IS_ERR(priv->refclk)) {
- -              error = PTR_ERR(priv->refclk);
- -              goto out_release;
- -      }
- -      clk_prepare_enable(priv->refclk);
- -
- -      if (info->gptp_ref_clk) {
- -              priv->gptp_clk = devm_clk_get(&pdev->dev, "gptp");
- -              if (IS_ERR(priv->gptp_clk)) {
- -                      error = PTR_ERR(priv->gptp_clk);
- -                      goto out_disable_refclk;
- -              }
- -              clk_prepare_enable(priv->gptp_clk);
- -      }
- -
         ndev->max_mtu = info->rx_max_buf_size - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
         ndev->min_mtu = ETH_MIN_MTU;
   
@@@ -2937,11 -2802,25 +2933,11 @@@
         ndev->netdev_ops = &ravb_netdev_ops;
         ndev->ethtool_ops = &ravb_ethtool_ops;
   
- -      /* Set AVB config mode */
- -      error = ravb_set_config_mode(ndev);
+ +      error = ravb_compute_gti(ndev);
         if (error)
- -              goto out_disable_gptp_clk;
- -
- -      if (info->gptp || info->ccc_gac) {
- -              /* Set GTI value */
- -              error = ravb_set_gti(ndev);
- -              if (error)
- -                      goto out_disable_gptp_clk;
+ +              goto out_rpm_put;
   
- -              /* Request GTI loading */
- -              ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
- -      }
- -
- -      if (info->internal_delay) {
- -              ravb_parse_delay_mode(np, ndev);
- -              ravb_set_delay_mode(ndev);
- -      }
+ +      ravb_parse_delay_mode(np, ndev);
   
         /* Allocate descriptor base address table */
         priv->desc_bat_size = sizeof(struct ravb_desc) * DBAT_ENTRY_NUM;
@@@ -2952,22 -2831,22 +2948,22 @@@
                         "Cannot allocate desc base address table (size %d bytes)\n",
                         priv->desc_bat_size);
                 error = -ENOMEM;
- -              goto out_disable_gptp_clk;
+ +              goto out_rpm_put;
         }
         for (q = RAVB_BE; q < DBAT_ENTRY_NUM; q++)
                 priv->desc_bat[q].die_dt = DT_EOS;
- -      ravb_write(ndev, priv->desc_bat_dma, DBAT);
   
         /* Initialise HW timestamp list */
         INIT_LIST_HEAD(&priv->ts_skb_list);
   
- -      /* Initialise PTP Clock driver */
- -      if (info->ccc_gac)
- -              ravb_ptp_init(ndev, pdev);
- -
         /* Debug message level */
         priv->msg_enable = RAVB_DEF_MSG_ENABLE;
   
+ +      /* Set config mode as this is needed for PHY initialization. */
+ +      error = ravb_set_opmode(ndev, CCC_OPC_CONFIG);
+ +      if (error)
+ +              goto out_rpm_put;
+ +
         /* Read and set MAC address */
         ravb_read_mac_address(np, ndev);
         if (!is_valid_ether_addr(ndev->dev_addr)) {
@@@ -2980,14 -2859,9 +2976,14 @@@
         error = ravb_mdio_init(priv);
         if (error) {
                 dev_err(&pdev->dev, "failed to initialize MDIO\n");
- -              goto out_dma_free;
+ +              goto out_reset_mode;
         }
   
+ +      /* Undo previous switch to config opmode. */
+ +      error = ravb_set_opmode(ndev, CCC_OPC_RESET);
+ +      if (error)
+ +              goto out_mdio_release;
+ +
         netif_napi_add(ndev, &priv->napi[RAVB_BE], ravb_poll);
         if (info->nc_queues)
                 netif_napi_add(ndev, &priv->napi[RAVB_NC], ravb_poll);
@@@ -3003,6 -2877,8 +2999,6 @@@
         netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n",
                     (u32)ndev->base_addr, ndev->dev_addr, ndev->irq);
   
- -      platform_set_drvdata(pdev, ndev);
- -
         return 0;
   
   out_napi_del:
@@@ -3010,18 -2886,22 +3006,18 @@@
                 netif_napi_del(&priv->napi[RAVB_NC]);
   
         netif_napi_del(&priv->napi[RAVB_BE]);
+ +out_mdio_release:
         ravb_mdio_release(priv);
- -out_dma_free:
+ +out_reset_mode:
+ +      ravb_set_opmode(ndev, CCC_OPC_RESET);
         dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
                           priv->desc_bat_dma);
- -
- -      /* Stop PTP Clock driver */
- -      if (info->ccc_gac)
- -              ravb_ptp_stop(ndev);
- -out_disable_gptp_clk:
- -      clk_disable_unprepare(priv->gptp_clk);
- -out_disable_refclk:
- -      clk_disable_unprepare(priv->refclk);
- -out_release:
+ +out_rpm_put:
         pm_runtime_put(&pdev->dev);
   out_rpm_disable:
         pm_runtime_disable(&pdev->dev);
+ +      clk_unprepare(priv->refclk);
+ +out_reset_assert:
         reset_control_assert(rstc);
   out_free_netdev:
         free_netdev(ndev);
@@@ -3041,12 -2921,20 +3037,12 @@@ static void ravb_remove(struct platform
   
         ravb_mdio_release(priv);
   
- -      /* Stop PTP Clock driver */
- -      if (info->ccc_gac)
- -              ravb_ptp_stop(ndev);
- -
         dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
                           priv->desc_bat_dma);
   
- -      ravb_set_opmode(ndev, CCC_OPC_RESET);
- -
- -      clk_disable_unprepare(priv->gptp_clk);
- -      clk_disable_unprepare(priv->refclk);
- -
         pm_runtime_put_sync(&pdev->dev);
         pm_runtime_disable(&pdev->dev);
+ +      clk_unprepare(priv->refclk);
         reset_control_assert(priv->rstc);
         free_netdev(ndev);
         platform_set_drvdata(pdev, NULL);
@@@ -3072,9 -2960,6 +3068,9 @@@ static int ravb_wol_setup(struct net_de
         /* Enable MagicPacket */
         ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
   
+ +      if (priv->info->ccc_gac)
+ +              ravb_ptp_stop(ndev);
+ +
         return enable_irq_wake(priv->emac_irq);
   }
   
@@@ -3082,20 -2967,6 +3078,20 @@@ static int ravb_wol_restore(struct net_
   {
         struct ravb_private *priv = netdev_priv(ndev);
         const struct ravb_hw_info *info = priv->info;
+ +      int error;
+ +
+ +      /* Set reset mode to rearm the WoL logic. */
+ +      error = ravb_set_opmode(ndev, CCC_OPC_RESET);
+ +      if (error)
+ +              return error;
+ +
+ +      /* Set AVB config mode. */
+ +      error = ravb_set_config_mode(ndev);
+ +      if (error)
+ +              return error;
+ +
+ +      if (priv->info->ccc_gac)
+ +              ravb_ptp_init(ndev, priv->pdev);
   
         if (info->nc_queues)
                 napi_enable(&priv->napi[RAVB_NC]);
@@@ -3109,80 -2980,102 +3105,80 @@@
         return disable_irq_wake(priv->emac_irq);
   }
   
- -static int __maybe_unused ravb_suspend(struct device *dev)
+ +static int ravb_suspend(struct device *dev)
   {
         struct net_device *ndev = dev_get_drvdata(dev);
         struct ravb_private *priv = netdev_priv(ndev);
         int ret;
   
         if (!netif_running(ndev))
- -              return 0;
+ +              goto reset_assert;
   
         netif_device_detach(ndev);
   
         if (priv->wol_enabled)
- -              ret = ravb_wol_setup(ndev);
- -      else
- -              ret = ravb_close(ndev);
+ +              return ravb_wol_setup(ndev);
   
- -      if (priv->info->ccc_gac)
- -              ravb_ptp_stop(ndev);
+ +      ret = ravb_close(ndev);
+ +      if (ret)
+ +              return ret;
   
- -      return ret;
+ +reset_assert:
+ +      return reset_control_assert(priv->rstc);
   }
   
- -static int __maybe_unused ravb_resume(struct device *dev)
+ +static int ravb_resume(struct device *dev)
   {
         struct net_device *ndev = dev_get_drvdata(dev);
         struct ravb_private *priv = netdev_priv(ndev);
- -      const struct ravb_hw_info *info = priv->info;
- -      int ret = 0;
- -
- -      /* If WoL is enabled set reset mode to rearm the WoL logic */
- -      if (priv->wol_enabled) {
- -              ret = ravb_set_opmode(ndev, CCC_OPC_RESET);
- -              if (ret)
- -                      return ret;
- -      }
- -
- -      /* All register have been reset to default values.
- -       * Restore all registers which where setup at probe time and
- -       * reopen device if it was running before system suspended.
- -       */
+ +      int ret;
   
- -      /* Set AVB config mode */
- -      ret = ravb_set_config_mode(ndev);
+ +      ret = reset_control_deassert(priv->rstc);
         if (ret)
                 return ret;
   
- -      if (info->gptp || info->ccc_gac) {
- -              /* Set GTI value */
- -              ret = ravb_set_gti(ndev);
+ +      if (!netif_running(ndev))
+ +              return 0;
+ +
+ +      /* If WoL is enabled restore the interface. */
+ +      if (priv->wol_enabled) {
+ +              ret = ravb_wol_restore(ndev);
                 if (ret)
                         return ret;
- -
- -              /* Request GTI loading */
- -              ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
         }
   
- -      if (info->internal_delay)
- -              ravb_set_delay_mode(ndev);
- -
- -      /* Restore descriptor base address table */
- -      ravb_write(ndev, priv->desc_bat_dma, DBAT);
+ +      /* Reopening the interface will restore the device to the working state. */
+ +      ret = ravb_open(ndev);
+ +      if (ret < 0)
+ +              return ret;
   
- -      if (priv->info->ccc_gac)
- -              ravb_ptp_init(ndev, priv->pdev);
- -
- -      if (netif_running(ndev)) {
- -              if (priv->wol_enabled) {
- -                      ret = ravb_wol_restore(ndev);
- -                      if (ret)
- -                              return ret;
- -              }
- -              ret = ravb_open(ndev);
- -              if (ret < 0)
- -                      return ret;
- -              ravb_set_rx_mode(ndev);
- -              netif_device_attach(ndev);
- -      }
+ +      ravb_set_rx_mode(ndev);
+ +      netif_device_attach(ndev);
   
         return ret;
   }
   
- -static int __maybe_unused ravb_runtime_nop(struct device *dev)
+ +static int ravb_runtime_suspend(struct device *dev)
   {
- -      /* Runtime PM callback shared between ->runtime_suspend()
- -       * and ->runtime_resume(). Simply returns success.
- -       *
- -       * This driver re-initializes all registers after
- -       * pm_runtime_get_sync() anyway so there is no need
- -       * to save and restore registers here.
- -       */
+ +      struct net_device *ndev = dev_get_drvdata(dev);
+ +      struct ravb_private *priv = netdev_priv(ndev);
+ +
+ +      clk_disable(priv->refclk);
+ +
         return 0;
   }
   
+ +static int ravb_runtime_resume(struct device *dev)
+ +{
+ +      struct net_device *ndev = dev_get_drvdata(dev);
+ +      struct ravb_private *priv = netdev_priv(ndev);
+ +
+ +      return clk_enable(priv->refclk);
+ +}
+ +
   static const struct dev_pm_ops ravb_dev_pm_ops = {
- -      SET_SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume)
- -      SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL)
+ +      SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume)
+ +      RUNTIME_PM_OPS(ravb_runtime_suspend, ravb_runtime_resume, NULL)
   };
   
   static struct platform_driver ravb_driver = {
@@@ -3190,7 -3083,7 +3186,7 @@@
         .remove_new     = ravb_remove,
         .driver = {
                 .name   = "ravb",
- -              .pm     = &ravb_dev_pm_ops,
+ +              .pm     = pm_ptr(&ravb_dev_pm_ops),
                 .of_match_table = ravb_match_table,
         },
   };
diff --combined include/linux/netdevice.h

index c541550b0e6e6c8628b4df2b6d47815a8907f2c5,ef7bfbb9849733fa7f1f097ba53a36a68cc3384b..f07c8374f29cb936fe11236fc63e06e741b1c965
--- 1/include/linux/netdevice.h
--- 2/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -1062,7 -1062,7 +1062,7 @@@ struct xfrmdev_ops 
         bool    (*xdo_dev_offload_ok) (struct sk_buff *skb,
                                        struct xfrm_state *x);
         void    (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
- -      void    (*xdo_dev_state_update_curlft) (struct xfrm_state *x);
+ +      void    (*xdo_dev_state_update_stats) (struct xfrm_state *x);
         int     (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack);
         void    (*xdo_dev_policy_delete) (struct xfrm_policy *x);
         void    (*xdo_dev_policy_free) (struct xfrm_policy *x);
@@@ -1815,15 -1815,6 +1815,15 @@@ enum netdev_stat_type 
         NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
   };
   
+ +enum netdev_reg_state {
+ +      NETREG_UNINITIALIZED = 0,
+ +      NETREG_REGISTERED,      /* completed register_netdevice */
+ +      NETREG_UNREGISTERING,   /* called unregister_netdevice */
+ +      NETREG_UNREGISTERED,    /* completed unregister todo */
+ +      NETREG_RELEASED,        /* called free_netdev */
+ +      NETREG_DUMMY,           /* dummy device for NAPI poll */
+ +};
+ +
   /**
    *    struct net_device - The DEVICE structure.
    *
@@@ -2150,6 -2141,11 +2150,11 @@@ struct net_device 
   
         /* TXRX read-mostly hotpath */
         __cacheline_group_begin(net_device_read_txrx);
+       union {
+               struct pcpu_lstats __percpu             *lstats;
+               struct pcpu_sw_netstats __percpu        *tstats;
+               struct pcpu_dstats __percpu             *dstats;
+       };
         unsigned int            flags;
         unsigned short          hard_header_len;
         netdev_features_t       features;
@@@ -2258,7 -2254,7 +2263,7 @@@
         const struct tlsdev_ops *tlsdev_ops;
   #endif
   
- -      unsigned char           operstate;
+ +      unsigned int            operstate;
         unsigned char           link_mode;
   
         unsigned char           if_port;
@@@ -2381,7 -2377,13 +2386,7 @@@
   
         struct list_head        link_watch_list;
   
- -      enum { NETREG_UNINITIALIZED=0,
- -             NETREG_REGISTERED,       /* completed register_netdevice */
- -             NETREG_UNREGISTERING,    /* called unregister_netdevice */
- -             NETREG_UNREGISTERED,     /* completed unregister todo */
- -             NETREG_RELEASED,         /* called free_netdev */
- -             NETREG_DUMMY,            /* dummy device for NAPI poll */
- -      } reg_state:8;
+ +      u8 reg_state;
   
         bool dismantle;
   
@@@ -2398,11 -2400,6 +2403,6 @@@
         enum netdev_ml_priv_type        ml_priv_type;
   
         enum netdev_stat_type           pcpu_stat_type:8;
-       union {
-               struct pcpu_lstats __percpu             *lstats;
-               struct pcpu_sw_netstats __percpu        *tstats;
-               struct pcpu_dstats __percpu             *dstats;
-       };
   
   #if IS_ENABLED(CONFIG_GARP)
         struct garp_port __rcu  *garp_port;
@@@ -3077,6 -3074,8 +3077,6 @@@ int call_netdevice_notifiers(unsigned l
   int call_netdevice_notifiers_info(unsigned long val,
                                   struct netdev_notifier_info *info);
   
- -extern rwlock_t                               dev_base_lock;          /* Device list lock */
- -
   #define for_each_netdev(net, d)               \
                 list_for_each_entry(d, &(net)->dev_base_head, dev_list)
   #define for_each_netdev_reverse(net, d)       \
@@@ -3199,7 -3198,7 +3199,7 @@@ static inline void unregister_netdevice
   int netdev_refcnt_read(const struct net_device *dev);
   void free_netdev(struct net_device *dev);
   void netdev_freemem(struct net_device *dev);
- -int init_dummy_netdev(struct net_device *dev);
+ +void init_dummy_netdev(struct net_device *dev);
   
   struct net_device *netdev_get_xmit_slave(struct net_device *dev,
                                          struct sk_buff *skb,
@@@ -3959,7 -3958,7 +3959,7 @@@ static inline void dev_consume_skb_any(
   u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
                              struct bpf_prog *xdp_prog);
   void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
- -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
+ +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
   int netif_rx(struct sk_buff *skb);
   int __netif_rx(struct sk_buff *skb);
   
@@@ -5255,9 -5254,7 +5255,9 @@@ static inline const char *netdev_name(c
   
   static inline const char *netdev_reg_state(const struct net_device *dev)
   {
- -      switch (dev->reg_state) {
+ +      u8 reg_state = READ_ONCE(dev->reg_state);
+ +
+ +      switch (reg_state) {
         case NETREG_UNINITIALIZED: return " (uninitialized)";
         case NETREG_REGISTERED: return "";
         case NETREG_UNREGISTERING: return " (unregistering)";
@@@ -5266,7 -5263,7 +5266,7 @@@
         case NETREG_DUMMY: return " (dummy)";
         }
   
- -      WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state);
+ +      WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state);
         return " (unknown)";
   }
   
diff --combined net/core/dev.c

index d8dd293a7a279f6495b97ca99b0c1db702e6415d,73a0219730075e666c4f11f668a50dbf9f9afa97..cc9c2eda65aca62bbb1c08b936520936e51f596e
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -153,8 -153,6 +153,8 @@@
   #include <linux/prandom.h>
   #include <linux/once_lite.h>
   #include <net/netdev_rx_queue.h>
+ +#include <net/page_pool/types.h>
+ +#include <net/page_pool/helpers.h>
   
   #include "dev.h"
   #include "net-sysfs.h"
@@@ -168,6 -166,28 +168,6 @@@ static int call_netdevice_notifiers_ext
                                            struct net_device *dev,
                                            struct netlink_ext_ack *extack);
   
- -/*
- - * The @dev_base_head list is protected by @dev_base_lock and the rtnl
- - * semaphore.
- - *
- - * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
- - *
- - * Writers must hold the rtnl semaphore while they loop through the
- - * dev_base_head list, and hold dev_base_lock for writing when they do the
- - * actual updates.  This allows pure readers to access the list even
- - * while a writer is preparing to update it.
- - *
- - * To put it another way, dev_base_lock is held for writing only to
- - * protect against pure readers; the rtnl semaphore provides the
- - * protection against other writers.
- - *
- - * See, for example usages, register_netdevice() and
- - * unregister_netdevice(), which must be called with the rtnl
- - * semaphore held.
- - */
- -DEFINE_RWLOCK(dev_base_lock);
- -EXPORT_SYMBOL(dev_base_lock);
- -
   static DEFINE_MUTEX(ifalias_mutex);
   
   /* protects napi_hash addition/deletion and napi_gen_id */
@@@ -316,27 -336,18 +316,27 @@@ int netdev_name_node_alt_create(struct 
                 return -ENOMEM;
         netdev_name_node_add(net, name_node);
         /* The node that holds dev->name acts as a head of per-device list. */
-       list_add_tail(&name_node->list, &dev->name_node->list);
+       list_add_tail_rcu(&name_node->list, &dev->name_node->list);
   
         return 0;
   }
   
- -static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+ +static void netdev_name_node_alt_free(struct rcu_head *head)
   {
- -      list_del(&name_node->list);
+ +      struct netdev_name_node *name_node =
+ +              container_of(head, struct netdev_name_node, rcu);
+ +
         kfree(name_node->name);
         netdev_name_node_free(name_node);
   }
   
+ +static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+ +{
+ +      netdev_name_node_del(name_node);
+ +      list_del(&name_node->list);
+ +      call_rcu(&name_node->rcu, netdev_name_node_alt_free);
+ +}
+ +
   int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
   {
         struct netdev_name_node *name_node;
@@@ -351,7 -362,10 +351,7 @@@
         if (name_node == dev->name_node || name_node->dev != dev)
                 return -EINVAL;
   
- -      netdev_name_node_del(name_node);
- -      synchronize_rcu();
         __netdev_name_node_alt_destroy(name_node);
- -
         return 0;
   }
   
@@@ -359,10 -373,8 +359,10 @@@ static void netdev_name_node_alt_flush(
   {
         struct netdev_name_node *name_node, *tmp;
   
- -      list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
- -              __netdev_name_node_alt_destroy(name_node);
+ +      list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) {
+ +              list_del(&name_node->list);
+ +              netdev_name_node_alt_free(&name_node->rcu);
+ +      }
   }
   
   /* Device list insertion */
@@@ -373,10 -385,12 +373,10 @@@ static void list_netdevice(struct net_d
   
         ASSERT_RTNL();
   
- -      write_lock(&dev_base_lock);
         list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
         netdev_name_node_add(net, dev->name_node);
         hlist_add_head_rcu(&dev->index_hlist,
                            dev_index_hash(net, dev->ifindex));
- -      write_unlock(&dev_base_lock);
   
         netdev_for_each_altname(dev, name_node)
                 netdev_name_node_add(net, name_node);
@@@ -390,7 -404,7 +390,7 @@@
   /* Device list removal
    * caller must respect a RCU grace period before freeing/reusing dev
    */
- -static void unlist_netdevice(struct net_device *dev, bool lock)
+ +static void unlist_netdevice(struct net_device *dev)
   {
         struct netdev_name_node *name_node;
         struct net *net = dev_net(dev);
@@@ -403,9 -417,13 +403,9 @@@
                 netdev_name_node_del(name_node);
   
         /* Unlink dev from the device chain */
- -      if (lock)
- -              write_lock(&dev_base_lock);
         list_del_rcu(&dev->dev_list);
         netdev_name_node_del(dev->name_node);
         hlist_del_rcu(&dev->index_hlist);
- -      if (lock)
- -              write_unlock(&dev_base_lock);
   
         dev_base_seq_inc(dev_net(dev));
   }
@@@ -424,12 -442,6 +424,12 @@@ static RAW_NOTIFIER_HEAD(netdev_chain)
   DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
   EXPORT_PER_CPU_SYMBOL(softnet_data);
   
+ +/* Page_pool has a lockless array/stack to alloc/recycle pages.
+ + * PP consumers must pay attention to run APIs in the appropriate context
+ + * (e.g. NAPI context).
+ + */
+ +static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+ +
   #ifdef CONFIG_LOCKDEP
   /*
    * register_netdevice() inits txq->_xmit_lock and sets lockdep class
@@@ -726,9 -738,9 +726,9 @@@ EXPORT_SYMBOL_GPL(dev_fill_forward_path
    *    @net: the applicable net namespace
    *    @name: name to find
    *
- - *    Find an interface by name. Must be called under RTNL semaphore
- - *    or @dev_base_lock. If the name is found a pointer to the device
- - *    is returned. If the name is not found then %NULL is returned. The
+ + *    Find an interface by name. Must be called under RTNL semaphore.
+ + *    If the name is found a pointer to the device is returned.
+ + *    If the name is not found then %NULL is returned. The
    *    reference counters are not incremented so the caller must be
    *    careful with locks.
    */
@@@ -809,7 -821,8 +809,7 @@@ EXPORT_SYMBOL(netdev_get_by_name)
    *    Search for an interface by index. Returns %NULL if the device
    *    is not found or a pointer to the device. The device has not
    *    had its reference counter increased so the caller must be careful
- - *    about locking. The caller must hold either the RTNL semaphore
- - *    or @dev_base_lock.
+ + *    about locking. The caller must hold the RTNL semaphore.
    */
   
   struct net_device *__dev_get_by_index(struct net *net, int ifindex)
@@@ -1199,13 -1212,13 +1199,13 @@@ int dev_change_name(struct net_device *
                             dev->flags & IFF_UP ? " (while UP)" : "");
   
         old_assign_type = dev->name_assign_type;
- -      dev->name_assign_type = NET_NAME_RENAMED;
+ +      WRITE_ONCE(dev->name_assign_type, NET_NAME_RENAMED);
   
   rollback:
         ret = device_rename(&dev->dev, dev->name);
         if (ret) {
                 memcpy(dev->name, oldname, IFNAMSIZ);
- -              dev->name_assign_type = old_assign_type;
+ +              WRITE_ONCE(dev->name_assign_type, old_assign_type);
                 up_write(&devnet_rename_sem);
                 return ret;
         }
@@@ -1214,11 -1227,15 +1214,11 @@@
   
         netdev_adjacent_rename_links(dev, oldname);
   
- -      write_lock(&dev_base_lock);
         netdev_name_node_del(dev->name_node);
- -      write_unlock(&dev_base_lock);
   
- -      synchronize_rcu();
+ +      synchronize_net();
   
- -      write_lock(&dev_base_lock);
         netdev_name_node_add(net, dev->name_node);
- -      write_unlock(&dev_base_lock);
   
         ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
         ret = notifier_to_errno(ret);
@@@ -1230,7 -1247,7 +1230,7 @@@
                         down_write(&devnet_rename_sem);
                         memcpy(dev->name, oldname, IFNAMSIZ);
                         memcpy(oldname, newname, IFNAMSIZ);
- -                      dev->name_assign_type = old_assign_type;
+ +                      WRITE_ONCE(dev->name_assign_type, old_assign_type);
                         old_assign_type = NET_NAME_RENAMED;
                         goto rollback;
                 } else {
@@@ -4841,12 -4858,6 +4841,12 @@@ u32 bpf_prog_run_generic_xdp(struct sk_
         xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
         xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
                          skb_headlen(skb) + mac_len, true);
+ +      if (skb_is_nonlinear(skb)) {
+ +              skb_shinfo(skb)->xdp_frags_size = skb->data_len;
+ +              xdp_buff_set_frags_flag(xdp);
+ +      } else {
+ +              xdp_buff_clear_frags_flag(xdp);
+ +      }
   
         orig_data_end = xdp->data_end;
         orig_data = xdp->data;
@@@ -4876,14 -4887,6 +4876,14 @@@
                 skb->len += off; /* positive on grow, negative on shrink */
         }
   
+ +      /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
+ +       * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
+ +       */
+ +      if (xdp_buff_has_frags(xdp))
+ +              skb->data_len = skb_shinfo(skb)->xdp_frags_size;
+ +      else
+ +              skb->data_len = 0;
+ +
         /* check if XDP changed eth hdr such SKB needs update */
         eth = (struct ethhdr *)xdp->data;
         if ((orig_eth_type != eth->h_proto) ||
@@@ -4917,35 -4920,11 +4917,35 @@@
         return act;
   }
   
- -static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ +static int
+ +netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
+ +{
+ +      struct sk_buff *skb = *pskb;
+ +      int err, hroom, troom;
+ +
+ +      if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
+ +              return 0;
+ +
+ +      /* In case we have to go down the path and also linearize,
+ +       * then lets do the pskb_expand_head() work just once here.
+ +       */
+ +      hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ +      troom = skb->tail + skb->data_len - skb->end;
+ +      err = pskb_expand_head(skb,
+ +                             hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ +                             troom > 0 ? troom + 128 : 0, GFP_ATOMIC);
+ +      if (err)
+ +              return err;
+ +
+ +      return skb_linearize(skb);
+ +}
+ +
+ +static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
                                      struct xdp_buff *xdp,
                                      struct bpf_prog *xdp_prog)
   {
- -      u32 act = XDP_DROP;
+ +      struct sk_buff *skb = *pskb;
+ +      u32 mac_len, act = XDP_DROP;
   
         /* Reinjected packets coming from act_mirred or similar should
          * not get XDP generic processing.
@@@ -4953,36 -4932,41 +4953,36 @@@
         if (skb_is_redirected(skb))
                 return XDP_PASS;
   
- -      /* XDP packets must be linear and must have sufficient headroom
- -       * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
- -       * native XDP provides, thus we need to do it here as well.
+ +      /* XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM
+ +       * bytes. This is the guarantee that also native XDP provides,
+ +       * thus we need to do it here as well.
          */
+ +      mac_len = skb->data - skb_mac_header(skb);
+ +      __skb_push(skb, mac_len);
+ +
         if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
             skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- -              int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
- -              int troom = skb->tail + skb->data_len - skb->end;
- -
- -              /* In case we have to go down the path and also linearize,
- -               * then lets do the pskb_expand_head() work just once here.
- -               */
- -              if (pskb_expand_head(skb,
- -                                   hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
- -                                   troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
- -                      goto do_drop;
- -              if (skb_linearize(skb))
+ +              if (netif_skb_check_for_xdp(pskb, xdp_prog))
                         goto do_drop;
         }
   
- -      act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+ +      __skb_pull(*pskb, mac_len);
+ +
+ +      act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog);
         switch (act) {
         case XDP_REDIRECT:
         case XDP_TX:
         case XDP_PASS:
                 break;
         default:
- -              bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);
+ +              bpf_warn_invalid_xdp_action((*pskb)->dev, xdp_prog, act);
                 fallthrough;
         case XDP_ABORTED:
- -              trace_xdp_exception(skb->dev, xdp_prog, act);
+ +              trace_xdp_exception((*pskb)->dev, xdp_prog, act);
                 fallthrough;
         case XDP_DROP:
         do_drop:
- -              kfree_skb(skb);
+ +              kfree_skb(*pskb);
                 break;
         }
   
@@@ -5020,24 -5004,24 +5020,24 @@@ void generic_xdp_tx(struct sk_buff *skb
   
   static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
   
- -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+ +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
   {
         if (xdp_prog) {
                 struct xdp_buff xdp;
                 u32 act;
                 int err;
   
- -              act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
+ +              act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
                 if (act != XDP_PASS) {
                         switch (act) {
                         case XDP_REDIRECT:
- -                              err = xdp_do_generic_redirect(skb->dev, skb,
+ +                              err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
                                                               &xdp, xdp_prog);
                                 if (err)
                                         goto out_redir;
                                 break;
                         case XDP_TX:
- -                              generic_xdp_tx(skb, xdp_prog);
+ +                              generic_xdp_tx(*pskb, xdp_prog);
                                 break;
                         }
                         return XDP_DROP;
@@@ -5045,7 -5029,7 +5045,7 @@@
         }
         return XDP_PASS;
   out_redir:
- -      kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
+ +      kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
         return XDP_DROP;
   }
   EXPORT_SYMBOL_GPL(do_xdp_generic);
@@@ -5368,8 -5352,7 +5368,8 @@@ another_round
                 int ret2;
   
                 migrate_disable();
- -              ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+ +              ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+ +                                    &skb);
                 migrate_enable();
   
                 if (ret2 != XDP_PASS) {
@@@ -6194,13 -6177,8 +6194,13 @@@ static void __busy_poll_stop(struct nap
         clear_bit(NAPI_STATE_SCHED, &napi->state);
   }
   
- -static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
- -                         u16 budget)
+ +enum {
+ +      NAPI_F_PREFER_BUSY_POLL = 1,
+ +      NAPI_F_END_ON_RESCHED   = 2,
+ +};
+ +
+ +static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
+ +                         unsigned flags, u16 budget)
   {
         bool skip_schedule = false;
         unsigned long timeout;
@@@ -6220,7 -6198,7 +6220,7 @@@
   
         local_bh_disable();
   
- -      if (prefer_busy_poll) {
+ +      if (flags & NAPI_F_PREFER_BUSY_POLL) {
                 napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
                 timeout = READ_ONCE(napi->dev->gro_flush_timeout);
                 if (napi->defer_hard_irqs_count && timeout) {
@@@ -6244,23 -6222,23 +6244,23 @@@
         local_bh_enable();
   }
   
- -void napi_busy_loop(unsigned int napi_id,
- -                  bool (*loop_end)(void *, unsigned long),
- -                  void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+ +static void __napi_busy_loop(unsigned int napi_id,
+ +                    bool (*loop_end)(void *, unsigned long),
+ +                    void *loop_end_arg, unsigned flags, u16 budget)
   {
         unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
         int (*napi_poll)(struct napi_struct *napi, int budget);
         void *have_poll_lock = NULL;
         struct napi_struct *napi;
   
+ +      WARN_ON_ONCE(!rcu_read_lock_held());
+ +
   restart:
         napi_poll = NULL;
   
- -      rcu_read_lock();
- -
         napi = napi_by_id(napi_id);
         if (!napi)
- -              goto out;
+ +              return;
   
         if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                 preempt_disable();
@@@ -6276,14 -6254,14 +6276,14 @@@
                          */
                         if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
                                    NAPIF_STATE_IN_BUSY_POLL)) {
- -                              if (prefer_busy_poll)
+ +                              if (flags & NAPI_F_PREFER_BUSY_POLL)
                                         set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
                                 goto count;
                         }
                         if (cmpxchg(&napi->state, val,
                                     val | NAPIF_STATE_IN_BUSY_POLL |
                                           NAPIF_STATE_SCHED) != val) {
- -                              if (prefer_busy_poll)
+ +                              if (flags & NAPI_F_PREFER_BUSY_POLL)
                                         set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
                                 goto count;
                         }
@@@ -6303,15 -6281,12 +6303,15 @@@ count
                         break;
   
                 if (unlikely(need_resched())) {
+ +                      if (flags & NAPI_F_END_ON_RESCHED)
+ +                              break;
                         if (napi_poll)
- -                              busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+ +                              busy_poll_stop(napi, have_poll_lock, flags, budget);
                         if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                                 preempt_enable();
                         rcu_read_unlock();
                         cond_resched();
+ +                      rcu_read_lock();
                         if (loop_end(loop_end_arg, start_time))
                                 return;
                         goto restart;
@@@ -6319,31 -6294,10 +6319,31 @@@
                 cpu_relax();
         }
         if (napi_poll)
- -              busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+ +              busy_poll_stop(napi, have_poll_lock, flags, budget);
         if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                 preempt_enable();
- -out:
+ +}
+ +
+ +void napi_busy_loop_rcu(unsigned int napi_id,
+ +                      bool (*loop_end)(void *, unsigned long),
+ +                      void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+ +{
+ +      unsigned flags = NAPI_F_END_ON_RESCHED;
+ +
+ +      if (prefer_busy_poll)
+ +              flags |= NAPI_F_PREFER_BUSY_POLL;
+ +
+ +      __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
+ +}
+ +
+ +void napi_busy_loop(unsigned int napi_id,
+ +                  bool (*loop_end)(void *, unsigned long),
+ +                  void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+ +{
+ +      unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
+ +
+ +      rcu_read_lock();
+ +      __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
         rcu_read_unlock();
   }
   EXPORT_SYMBOL(napi_busy_loop);
@@@ -8960,7 -8914,7 +8960,7 @@@ int dev_set_mac_address(struct net_devi
   }
   EXPORT_SYMBOL(dev_set_mac_address);
   
- -static DECLARE_RWSEM(dev_addr_sem);
+ +DECLARE_RWSEM(dev_addr_sem);
   
   int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
                              struct netlink_ext_ack *extack)
@@@ -9736,11 -9690,11 +9736,11 @@@ static void dev_index_release(struct ne
   /* Delayed registration/unregisteration */
   LIST_HEAD(net_todo_list);
   DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+ +atomic_t dev_unreg_count = ATOMIC_INIT(0);
   
   static void net_set_todo(struct net_device *dev)
   {
         list_add_tail(&dev->todo_list, &net_todo_list);
- -      atomic_inc(&dev_net(dev)->dev_unreg_count);
   }
   
   static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@@ -10305,9 -10259,9 +10305,9 @@@ int register_netdevice(struct net_devic
                 goto err_ifindex_release;
   
         ret = netdev_register_kobject(dev);
- -      write_lock(&dev_base_lock);
- -      dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
- -      write_unlock(&dev_base_lock);
+ +
+ +      WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
+ +
         if (ret)
                 goto err_uninit_notify;
   
@@@ -10383,7 -10337,7 +10383,7 @@@ EXPORT_SYMBOL(register_netdevice)
    *    that need to tie several hardware interfaces to a single NAPI
    *    poll scheduler due to HW limitations.
    */
- -int init_dummy_netdev(struct net_device *dev)
+ +void init_dummy_netdev(struct net_device *dev)
   {
         /* Clear everything. Note we don't initialize spinlocks
          * are they aren't supposed to be taken by any of the
@@@ -10411,6 -10365,8 +10411,6 @@@
          * because users of this 'device' dont need to change
          * its refcount.
          */
- -
- -      return 0;
   }
   EXPORT_SYMBOL_GPL(init_dummy_netdev);
   
@@@ -10565,7 -10521,6 +10565,7 @@@ void netdev_run_todo(void
   {
         struct net_device *dev, *tmp;
         struct list_head list;
+ +      int cnt;
   #ifdef CONFIG_LOCKDEP
         struct list_head unlink_list;
   
@@@ -10596,11 -10551,12 +10596,11 @@@
                         continue;
                 }
   
- -              write_lock(&dev_base_lock);
- -              dev->reg_state = NETREG_UNREGISTERED;
- -              write_unlock(&dev_base_lock);
+ +              WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
                 linkwatch_sync_dev(dev);
         }
   
+ +      cnt = 0;
         while (!list_empty(&list)) {
                 dev = netdev_wait_allrefs_any(&list);
                 list_del(&dev->todo_list);
@@@ -10618,13 -10574,12 +10618,13 @@@
                 if (dev->needs_free_netdev)
                         free_netdev(dev);
   
- -              if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count))
- -                      wake_up(&netdev_unregistering_wq);
+ +              cnt++;
   
                 /* Free network device */
                 kobject_put(&dev->dev.kobj);
         }
+ +      if (cnt && atomic_sub_and_test(cnt, &dev_unreg_count))
+ +              wake_up(&netdev_unregistering_wq);
   }
   
   /* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
@@@ -11015,7 -10970,7 +11015,7 @@@ void free_netdev(struct net_device *dev
         }
   
         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
- -      dev->reg_state = NETREG_RELEASED;
+ +      WRITE_ONCE(dev->reg_state, NETREG_RELEASED);
   
         /* will free via device release */
         put_device(&dev->dev);
@@@ -11071,7 -11026,6 +11071,7 @@@ void unregister_netdevice_many_notify(s
   {
         struct net_device *dev, *tmp;
         LIST_HEAD(close_head);
+ +      int cnt = 0;
   
         BUG_ON(dev_boot_phase);
         ASSERT_RTNL();
@@@ -11103,8 -11057,10 +11103,8 @@@
   
         list_for_each_entry(dev, head, unreg_list) {
                 /* And unlink it from device chain. */
- -              write_lock(&dev_base_lock);
- -              unlist_netdevice(dev, false);
- -              dev->reg_state = NETREG_UNREGISTERING;
- -              write_unlock(&dev_base_lock);
+ +              unlist_netdevice(dev);
+ +              WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
         }
         flush_all_backlogs();
   
@@@ -11166,9 -11122,7 +11166,9 @@@
         list_for_each_entry(dev, head, unreg_list) {
                 netdev_put(dev, &dev->dev_registered_tracker);
                 net_set_todo(dev);
+ +              cnt++;
         }
+ +      atomic_add(cnt, &dev_unreg_count);
   
         list_del(head);
   }
@@@ -11286,7 -11240,7 +11286,7 @@@ int __dev_change_net_namespace(struct n
         dev_close(dev);
   
         /* And unlink it from device chain */
- -      unlist_netdevice(dev, true);
+ +      unlist_netdevice(dev);
   
         synchronize_net();
   
@@@ -11622,8 -11576,11 +11622,8 @@@ static void __net_exit default_device_e
                         snprintf(fb_name, IFNAMSIZ, "dev%%d");
   
                 netdev_for_each_altname_safe(dev, name_node, tmp)
- -                      if (netdev_name_in_use(&init_net, name_node->name)) {
- -                              netdev_name_node_del(name_node);
- -                              synchronize_rcu();
+ +                      if (netdev_name_in_use(&init_net, name_node->name))
                                 __netdev_name_node_alt_destroy(name_node);
- -                      }
   
                 err = dev_change_net_namespace(dev, &init_net, fb_name);
                 if (err) {
@@@ -11695,11 -11652,12 +11695,12 @@@ static void __init net_dev_struct_check
         CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160);
   
         /* TXRX read-mostly hotpath */
+       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats);
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
-       CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
+       CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38);
   
         /* RX read-mostly hotpath */
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
@@@ -11729,27 -11687,6 +11730,27 @@@
    *
    */
   
+ +/* We allocate 256 pages for each CPU if PAGE_SHIFT is 12 */
+ +#define SYSTEM_PERCPU_PAGE_POOL_SIZE  ((1 << 20) / PAGE_SIZE)
+ +
+ +static int net_page_pool_create(int cpuid)
+ +{
+ +#if IS_ENABLED(CONFIG_PAGE_POOL)
+ +      struct page_pool_params page_pool_params = {
+ +              .pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
+ +              .nid = NUMA_NO_NODE,
+ +      };
+ +      struct page_pool *pp_ptr;
+ +
+ +      pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
+ +      if (IS_ERR(pp_ptr))
+ +              return -ENOMEM;
+ +
+ +      per_cpu(system_page_pool, cpuid) = pp_ptr;
+ +#endif
+ +      return 0;
+ +}
+ +
   /*
    *       This is called single threaded during boot, so no need
    *       to take the rtnl semaphore.
@@@ -11802,9 -11739,6 +11803,9 @@@ static int __init net_dev_init(void
                 init_gro_hash(&sd->backlog);
                 sd->backlog.poll = process_backlog;
                 sd->backlog.weight = weight_p;
+ +
+ +              if (net_page_pool_create(i))
+ +                      goto out;
         }
   
         dev_boot_phase = 0;
@@@ -11832,19 -11766,6 +11833,19 @@@
         WARN_ON(rc < 0);
         rc = 0;
   out:
+ +      if (rc < 0) {
+ +              for_each_possible_cpu(i) {
+ +                      struct page_pool *pp_ptr;
+ +
+ +                      pp_ptr = per_cpu(system_page_pool, i);
+ +                      if (!pp_ptr)
+ +                              continue;
+ +
+ +                      page_pool_destroy(pp_ptr);
+ +                      per_cpu(system_page_pool, i) = NULL;
+ +              }
+ +      }
+ +
         return rc;
   }
   
diff --combined net/core/rtnetlink.c

index 39e66bf3e2384eb8e533441301fa950e77291d2c,9c4f427f3a5057b52ec05405e8b15b8ca2246b4b..c54dbe05c4c5df126d0b58403049ebc1d272907e
--- 1/net/core/rtnetlink.c
--- 2/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@@ -483,15 -483,24 +483,15 @@@ EXPORT_SYMBOL_GPL(__rtnl_link_unregiste
    */
   static void rtnl_lock_unregistering_all(void)
   {
- -      struct net *net;
- -      bool unregistering;
         DEFINE_WAIT_FUNC(wait, woken_wake_function);
   
         add_wait_queue(&netdev_unregistering_wq, &wait);
         for (;;) {
- -              unregistering = false;
                 rtnl_lock();
                 /* We held write locked pernet_ops_rwsem, and parallel
                  * setup_net() and cleanup_net() are not possible.
                  */
- -              for_each_net(net) {
- -                      if (atomic_read(&net->dev_unreg_count) > 0) {
- -                              unregistering = true;
- -                              break;
- -                      }
- -              }
- -              if (!unregistering)
+ +              if (!atomic_read(&dev_unreg_count))
                         break;
                 __rtnl_unlock();
   
@@@ -842,22 -851,9 +842,22 @@@ int rtnl_put_cacheinfo(struct sk_buff *
   }
   EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
   
+ +void netdev_set_operstate(struct net_device *dev, int newstate)
+ +{
+ +      unsigned int old = READ_ONCE(dev->operstate);
+ +
+ +      do {
+ +              if (old == newstate)
+ +                      return;
+ +      } while (!try_cmpxchg(&dev->operstate, &old, newstate));
+ +
+ +      netdev_state_change(dev);
+ +}
+ +EXPORT_SYMBOL(netdev_set_operstate);
+ +
   static void set_operstate(struct net_device *dev, unsigned char transition)
   {
- -      unsigned char operstate = dev->operstate;
+ +      unsigned char operstate = READ_ONCE(dev->operstate);
   
         switch (transition) {
         case IF_OPER_UP:
@@@ -879,7 -875,12 +879,7 @@@
                 break;
         }
   
- -      if (dev->operstate != operstate) {
- -              write_lock(&dev_base_lock);
- -              dev->operstate = operstate;
- -              write_unlock(&dev_base_lock);
- -              netdev_state_change(dev);
- -      }
+ +      netdev_set_operstate(dev, operstate);
   }
   
   static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
@@@ -1019,14 -1020,17 +1019,17 @@@ static size_t rtnl_xdp_size(void
   static size_t rtnl_prop_list_size(const struct net_device *dev)
   {
         struct netdev_name_node *name_node;
-       size_t size;
+       unsigned int cnt = 0;
+ 
+       rcu_read_lock();
+       list_for_each_entry_rcu(name_node, &dev->name_node->list, list)
+               cnt++;
+       rcu_read_unlock();
   
-       if (list_empty(&dev->name_node->list))
+       if (!cnt)
                 return 0;
-       size = nla_total_size(0);
-       list_for_each_entry(name_node, &dev->name_node->list, list)
-               size += nla_total_size(ALTIFNAMSIZ);
-       return size;
+ 
+       return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ);
   }
   
   static size_t rtnl_proto_down_size(const struct net_device *dev)
@@@ -2196,22 -2200,25 +2199,22 @@@ static int rtnl_valid_dump_ifinfo_req(c
   
   static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
   {
+ +      const struct rtnl_link_ops *kind_ops = NULL;
         struct netlink_ext_ack *extack = cb->extack;
         const struct nlmsghdr *nlh = cb->nlh;
         struct net *net = sock_net(skb->sk);
- -      struct net *tgt_net = net;
- -      int h, s_h;
- -      int idx = 0, s_idx;
- -      struct net_device *dev;
- -      struct hlist_head *head;
+ +      unsigned int flags = NLM_F_MULTI;
         struct nlattr *tb[IFLA_MAX+1];
+ +      struct {
+ +              unsigned long ifindex;
+ +      } *ctx = (void *)cb->ctx;
+ +      struct net *tgt_net = net;
         u32 ext_filter_mask = 0;
- -      const struct rtnl_link_ops *kind_ops = NULL;
- -      unsigned int flags = NLM_F_MULTI;
+ +      struct net_device *dev;
         int master_idx = 0;
         int netnsid = -1;
         int err, i;
   
- -      s_h = cb->args[0];
- -      s_idx = cb->args[1];
- -
         err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack);
         if (err < 0) {
                 if (cb->strict_check)
@@@ -2255,21 -2262,36 +2258,21 @@@
                 flags |= NLM_F_DUMP_FILTERED;
   
   walk_entries:
- -      for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- -              idx = 0;
- -              head = &tgt_net->dev_index_head[h];
- -              hlist_for_each_entry(dev, head, index_hlist) {
- -                      if (link_dump_filtered(dev, master_idx, kind_ops))
- -                              goto cont;
- -                      if (idx < s_idx)
- -                              goto cont;
- -                      err = rtnl_fill_ifinfo(skb, dev, net,
- -                                             RTM_NEWLINK,
- -                                             NETLINK_CB(cb->skb).portid,
- -                                             nlh->nlmsg_seq, 0, flags,
- -                                             ext_filter_mask, 0, NULL, 0,
- -                                             netnsid, GFP_KERNEL);
- -
- -                      if (err < 0) {
- -                              if (likely(skb->len))
- -                                      goto out;
- -
- -                              goto out_err;
- -                      }
- -cont:
- -                      idx++;
+ +      err = 0;
+ +      for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+ +              if (link_dump_filtered(dev, master_idx, kind_ops))
+ +                      continue;
+ +              err = rtnl_fill_ifinfo(skb, dev, net, RTM_NEWLINK,
+ +                                     NETLINK_CB(cb->skb).portid,
+ +                                     nlh->nlmsg_seq, 0, flags,
+ +                                     ext_filter_mask, 0, NULL, 0,
+ +                                     netnsid, GFP_KERNEL);
+ +              if (err < 0) {
+ +                      if (likely(skb->len))
+ +                              err = skb->len;
+ +                      break;
                 }
         }
- -out:
- -      err = skb->len;
- -out_err:
- -      cb->args[1] = idx;
- -      cb->args[0] = h;
         cb->seq = tgt_net->dev_base_seq;
         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
         if (netnsid >= 0)
@@@ -2961,9 -2983,11 +2964,9 @@@ static int do_setlink(const struct sk_b
         if (tb[IFLA_LINKMODE]) {
                 unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
   
- -              write_lock(&dev_base_lock);
                 if (dev->link_mode ^ value)
                         status |= DO_SETLINK_NOTIFY;
- -              dev->link_mode = value;
- -              write_unlock(&dev_base_lock);
+ +              WRITE_ONCE(dev->link_mode, value);
         }
   
         if (tb[IFLA_VFINFO_LIST]) {
diff --combined net/ipv4/ip_gre.c

index aad5125b7a65ecc770f1b962ac5b417bd931e3ba,6b9cf5a24c19ff06634f7841141b8a30639b8d17..7b16c211b904473cc5e350aafdefb86fbf1b3693
--- 1/net/ipv4/ip_gre.c
--- 2/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@@ -1025,16 -1025,14 +1025,16 @@@ static int __net_init ipgre_init_net(st
         return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
   }
   
- -static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
+ +static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
+ +                                           struct list_head *dev_to_kill)
   {
- -      ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
+ +      ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
+ +                            dev_to_kill);
   }
   
   static struct pernet_operations ipgre_net_ops = {
         .init = ipgre_init_net,
- -      .exit_batch = ipgre_exit_batch_net,
+ +      .exit_batch_rtnl = ipgre_exit_batch_rtnl,
         .id   = &ipgre_net_id,
         .size = sizeof(struct ip_tunnel_net),
   };
@@@ -1699,16 -1697,14 +1699,16 @@@ static int __net_init ipgre_tap_init_ne
         return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
   }
   
- -static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
+ +static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
+ +                                               struct list_head *dev_to_kill)
   {
- -      ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
+ +      ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
+ +                            dev_to_kill);
   }
   
   static struct pernet_operations ipgre_tap_net_ops = {
         .init = ipgre_tap_init_net,
- -      .exit_batch = ipgre_tap_exit_batch_net,
+ +      .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
         .id   = &gre_tap_net_id,
         .size = sizeof(struct ip_tunnel_net),
   };
@@@ -1719,16 -1715,14 +1719,16 @@@ static int __net_init erspan_init_net(s
                                   &erspan_link_ops, "erspan0");
   }
   
- -static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
+ +static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
+ +                                            struct list_head *dev_to_kill)
   {
- -      ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
+ +      ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
+ +                            dev_to_kill);
   }
   
   static struct pernet_operations erspan_net_ops = {
         .init = erspan_init_net,
- -      .exit_batch = erspan_exit_batch_net,
+ +      .exit_batch_rtnl = erspan_exit_batch_rtnl,
         .id   = &erspan_net_id,
         .size = sizeof(struct ip_tunnel_net),
   };
@@@ -1799,6 -1793,7 +1799,7 @@@ static void __exit ipgre_fini(void
   
   module_init(ipgre_init);
   module_exit(ipgre_fini);
+ MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
   MODULE_LICENSE("GPL");
   MODULE_ALIAS_RTNL_LINK("gre");
   MODULE_ALIAS_RTNL_LINK("gretap");
diff --combined net/ipv4/ip_output.c

index 5b5a0adb927ffaf2a925094e421cc4f620d22a9c,67d846622365e8da9c2295f76943a504d16b066f..1fe794967211e249016df00dc3c2ae230d71dcff
--- 1/net/ipv4/ip_output.c
--- 2/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@@ -493,7 -493,7 +493,7 @@@ int __ip_queue_xmit(struct sock *sk, st
                                            inet->inet_dport,
                                            inet->inet_sport,
                                            sk->sk_protocol,
- -                                         RT_CONN_FLAGS_TOS(sk, tos),
+ +                                         RT_TOS(tos),
                                            sk->sk_bound_dev_if);
                 if (IS_ERR(rt))
                         goto no_route;
@@@ -972,8 -972,8 +972,8 @@@ static int __ip_append_data(struct soc
         unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
         int csummode = CHECKSUM_NONE;
         struct rtable *rt = (struct rtable *)cork->dst;
+       bool paged, hold_tskey, extra_uref = false;
         unsigned int wmem_alloc_delta = 0;
-       bool paged, extra_uref = false;
         u32 tskey = 0;
   
         skb = skb_peek_tail(queue);
@@@ -982,10 -982,6 +982,6 @@@
         mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
         paged = !!cork->gso_size;
   
-       if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-           READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
-               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
- 
         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
   
         fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@@ -1052,6 -1048,11 +1048,11 @@@
   
         cork->length += length;
   
+       hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+                    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+       if (hold_tskey)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ 
         /* So, what's going on in the loop below?
          *
          * We use calculated fragment length to generate chained skb,
@@@ -1274,6 -1275,8 +1275,8 @@@ error
         cork->length -= length;
         IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+       if (hold_tskey)
+               atomic_dec(&sk->sk_tskey);
         return err;
   }
   
diff --combined net/ipv4/ip_tunnel.c

index 9f44c49a61dee6f645932cca18dcbf786a040edd,a4513ffb66cbb74c14112bbc4c1d36d02e7f659b..756f8b923883c69d4300356dc8ad504b807e2513
--- 1/net/ipv4/ip_tunnel.c
--- 2/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@@ -102,9 -102,10 +102,9 @@@ struct ip_tunnel *ip_tunnel_lookup(stru
                 if (!ip_tunnel_key_match(&t->parms, flags, key))
                         continue;
   
- -              if (t->parms.link == link)
+ +              if (READ_ONCE(t->parms.link) == link)
                         return t;
- -              else
- -                      cand = t;
+ +              cand = t;
         }
   
         hlist_for_each_entry_rcu(t, head, hash_node) {
@@@ -116,9 -117,9 +116,9 @@@
                 if (!ip_tunnel_key_match(&t->parms, flags, key))
                         continue;
   
- -              if (t->parms.link == link)
+ +              if (READ_ONCE(t->parms.link) == link)
                         return t;
- -              else if (!cand)
+ +              if (!cand)
                         cand = t;
         }
   
@@@ -136,9 -137,9 +136,9 @@@
                 if (!ip_tunnel_key_match(&t->parms, flags, key))
                         continue;
   
- -              if (t->parms.link == link)
+ +              if (READ_ONCE(t->parms.link) == link)
                         return t;
- -              else if (!cand)
+ +              if (!cand)
                         cand = t;
         }
   
@@@ -149,9 -150,9 +149,9 @@@
                     !(t->dev->flags & IFF_UP))
                         continue;
   
- -              if (t->parms.link == link)
+ +              if (READ_ONCE(t->parms.link) == link)
                         return t;
- -              else if (!cand)
+ +              if (!cand)
                         cand = t;
         }
   
@@@ -220,7 -221,7 +220,7 @@@ static struct ip_tunnel *ip_tunnel_find
         hlist_for_each_entry_rcu(t, head, hash_node) {
                 if (local == t->parms.iph.saddr &&
                     remote == t->parms.iph.daddr &&
- -                  link == t->parms.link &&
+ +                  link == READ_ONCE(t->parms.link) &&
                     type == t->dev->type &&
                     ip_tunnel_key_match(&t->parms, flags, key))
                         break;
@@@ -746,7 -747,7 +746,7 @@@ void ip_tunnel_xmit(struct sk_buff *skb
   
         ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
                             tunnel->parms.o_key, RT_TOS(tos),
- -                          dev_net(dev), tunnel->parms.link,
+ +                          dev_net(dev), READ_ONCE(tunnel->parms.link),
                             tunnel->fwmark, skb_get_hash(skb), 0);
   
         if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
@@@ -866,7 -867,7 +866,7 @@@ static void ip_tunnel_update(struct ip_
         if (t->parms.link != p->link || t->fwmark != fwmark) {
                 int mtu;
   
- -              t->parms.link = p->link;
+ +              WRITE_ONCE(t->parms.link, p->link);
                 t->fwmark = fwmark;
                 mtu = ip_tunnel_bind_dev(dev);
                 if (set_mtu)
@@@ -1056,9 -1057,9 +1056,9 @@@ EXPORT_SYMBOL(ip_tunnel_get_link_net)
   
   int ip_tunnel_get_iflink(const struct net_device *dev)
   {
- -      struct ip_tunnel *tunnel = netdev_priv(dev);
+ +      const struct ip_tunnel *tunnel = netdev_priv(dev);
   
- -      return tunnel->parms.link;
+ +      return READ_ONCE(tunnel->parms.link);
   }
   EXPORT_SYMBOL(ip_tunnel_get_iflink);
   
@@@ -1129,17 -1130,19 +1129,17 @@@ static void ip_tunnel_destroy(struct ne
   }
   
   void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
- -                         struct rtnl_link_ops *ops)
+ +                         struct rtnl_link_ops *ops,
+ +                         struct list_head *dev_to_kill)
   {
         struct ip_tunnel_net *itn;
         struct net *net;
- -      LIST_HEAD(list);
   
- -      rtnl_lock();
+ +      ASSERT_RTNL();
         list_for_each_entry(net, net_list, exit_list) {
                 itn = net_generic(net, id);
- -              ip_tunnel_destroy(net, itn, &list, ops);
+ +              ip_tunnel_destroy(net, itn, dev_to_kill, ops);
         }
- -      unregister_netdevice_many(&list);
- -      rtnl_unlock();
   }
   EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
   
@@@ -1268,7 -1271,6 +1268,7 @@@ int ip_tunnel_init(struct net_device *d
   
         if (tunnel->collect_md)
                 netif_keep_dst(dev);
+ +      netdev_lockdep_set_classes(dev);
         return 0;
   }
   EXPORT_SYMBOL_GPL(ip_tunnel_init);
@@@ -1296,4 -1298,5 +1296,5 @@@ void ip_tunnel_setup(struct net_device 
   }
   EXPORT_SYMBOL_GPL(ip_tunnel_setup);
   
+ MODULE_DESCRIPTION("IPv4 tunnel implementation library");
   MODULE_LICENSE("GPL");
diff --combined net/ipv4/ip_vti.c

index fb1f52d2131128a39ab5bf0482359b7b75989fb6,d1d6bb28ed6e95c6e9c247bf1df1b27287bc8328..ee587adb169f6a1c6466ff2c997ca85a2a97e8e0
--- 1/net/ipv4/ip_vti.c
--- 2/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@@ -510,16 -510,14 +510,16 @@@ static int __net_init vti_init_net(stru
         return 0;
   }
   
- -static void __net_exit vti_exit_batch_net(struct list_head *list_net)
+ +static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net,
+ +                                         struct list_head *dev_to_kill)
   {
- -      ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
+ +      ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops,
+ +                            dev_to_kill);
   }
   
   static struct pernet_operations vti_net_ops = {
         .init = vti_init_net,
- -      .exit_batch = vti_exit_batch_net,
+ +      .exit_batch_rtnl = vti_exit_batch_rtnl,
         .id   = &vti_net_id,
         .size = sizeof(struct ip_tunnel_net),
   };
@@@ -723,6 -721,7 +723,7 @@@ static void __exit vti_fini(void
   
   module_init(vti_init);
   module_exit(vti_fini);
+ MODULE_DESCRIPTION("Virtual (secure) IP tunneling library");
   MODULE_LICENSE("GPL");
   MODULE_ALIAS_RTNL_LINK("vti");
   MODULE_ALIAS_NETDEV("ip_vti0");
diff --combined net/ipv4/ipip.c

index 0151eea06cc50bec4ae64f08ca6a7161e3cbf9ae,03afa3871efc53b5af543e7d53283be69a02f818..f2696eaadbe69d4d46a2fc576ffff1a13cae8c88
--- 1/net/ipv4/ipip.c
--- 2/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@@ -592,16 -592,14 +592,16 @@@ static int __net_init ipip_init_net(str
         return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
   }
   
- -static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
+ +static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net,
+ +                                          struct list_head *dev_to_kill)
   {
- -      ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
+ +      ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops,
+ +                            dev_to_kill);
   }
   
   static struct pernet_operations ipip_net_ops = {
         .init = ipip_init_net,
- -      .exit_batch = ipip_exit_batch_net,
+ +      .exit_batch_rtnl = ipip_exit_batch_rtnl,
         .id   = &ipip_net_id,
         .size = sizeof(struct ip_tunnel_net),
   };
@@@ -660,6 -658,7 +660,7 @@@ static void __exit ipip_fini(void
   
   module_init(ipip_init);
   module_exit(ipip_fini);
+ MODULE_DESCRIPTION("IP/IP protocol decoder library");
   MODULE_LICENSE("GPL");
   MODULE_ALIAS_RTNL_LINK("ipip");
   MODULE_ALIAS_NETDEV("tunl0");
diff --combined net/ipv6/sit.c

index b2da1f1b5fec4d784268cb04c78ddff87d1ca576,5e9f625b76e36b9a61c6c2db0b4163e78dca549a..ed3a44aa1e9d857ceb626eb5c879bd74374d2315
--- 1/net/ipv6/sit.c
--- 2/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@@ -1460,7 -1460,6 +1460,7 @@@ static int ipip6_tunnel_init(struct net
                 return err;
         }
         netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ +      netdev_lockdep_set_classes(dev);
         return 0;
   }
   
@@@ -1876,19 -1875,22 +1876,19 @@@ err_alloc_dev
         return err;
   }
   
- -static void __net_exit sit_exit_batch_net(struct list_head *net_list)
+ +static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list,
+ +                                         struct list_head *dev_to_kill)
   {
- -      LIST_HEAD(list);
         struct net *net;
   
- -      rtnl_lock();
+ +      ASSERT_RTNL();
         list_for_each_entry(net, net_list, exit_list)
- -              sit_destroy_tunnels(net, &list);
- -
- -      unregister_netdevice_many(&list);
- -      rtnl_unlock();
+ +              sit_destroy_tunnels(net, dev_to_kill);
   }
   
   static struct pernet_operations sit_net_ops = {
         .init = sit_init_net,
- -      .exit_batch = sit_exit_batch_net,
+ +      .exit_batch_rtnl = sit_exit_batch_rtnl,
         .id   = &sit_net_id,
         .size = sizeof(struct sit_net),
   };
@@@ -1954,6 -1956,7 +1954,7 @@@ xfrm_tunnel_failed
   
   module_init(sit_init);
   module_exit(sit_cleanup);
+ MODULE_DESCRIPTION("IPv6-in-IPv4 tunnel SIT driver");
   MODULE_LICENSE("GPL");
   MODULE_ALIAS_RTNL_LINK("sit");
   MODULE_ALIAS_NETDEV("sit0");
diff --combined net/mptcp/options.c

index 801a3525230d010cb831de4fe9ff8e9a0c44cbb7,e3e96a49f92296aed056137a815f0e2a30b8407c..23e317ffc9015b6660cf77b30a57780d52081af5
--- 1/net/mptcp/options.c
--- 2/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@@ -689,8 -689,8 +689,8 @@@ static bool mptcp_established_options_a
         opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
         if (!echo) {
                 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
- -              opts->ahmac = add_addr_generate_hmac(msk->local_key,
- -                                                   msk->remote_key,
+ +              opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key),
+ +                                                   READ_ONCE(msk->remote_key),
                                                      &opts->addr);
         } else {
                 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
@@@ -792,7 -792,7 +792,7 @@@ static bool mptcp_established_options_f
   
         *size = TCPOLEN_MPTCP_FASTCLOSE;
         opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
- -      opts->rcvr_key = msk->remote_key;
+ +      opts->rcvr_key = READ_ONCE(msk->remote_key);
   
         pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
         MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
@@@ -962,9 -962,7 +962,7 @@@ static bool check_fully_established(str
                 /* subflows are fully established as soon as we get any
                  * additional ack, including ADD_ADDR.
                  */
-               subflow->fully_established = 1;
-               WRITE_ONCE(msk->fully_established, true);
-               goto check_notify;
+               goto set_fully_established;
         }
   
         /* If the first established packet does not contain MP_CAPABLE + data
@@@ -986,7 -984,10 +984,10 @@@
   set_fully_established:
         if (unlikely(!READ_ONCE(msk->pm.server_side)))
                 pr_warn_once("bogus mpc option on established client sk");
-       mptcp_subflow_fully_established(subflow, mp_opt);
+ 
+       mptcp_data_lock((struct sock *)msk);
+       __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+       mptcp_data_unlock((struct sock *)msk);
   
   check_notify:
         /* if the subflow is not already linked into the conn_list, we can't
@@@ -1030,7 -1031,7 +1031,7 @@@ u64 __mptcp_expand_seq(u64 old_seq, u6
   static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
   {
         msk->bytes_acked += new_snd_una - msk->snd_una;
- -      msk->snd_una = new_snd_una;
+ +      WRITE_ONCE(msk->snd_una, new_snd_una);
   }
   
   static void ack_update_msk(struct mptcp_sock *msk,
@@@ -1057,10 -1058,10 +1058,10 @@@
         new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
   
         if (after64(new_wnd_end, msk->wnd_end))
- -              msk->wnd_end = new_wnd_end;
+ +              WRITE_ONCE(msk->wnd_end, new_wnd_end);
   
         /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
- -      if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
+ +      if (after64(msk->wnd_end, snd_nxt))
                 __mptcp_check_push(sk, ssk);
   
         if (after64(new_snd_una, old_snd_una)) {
@@@ -1071,7 -1072,7 +1072,7 @@@
   
         trace_ack_update_msk(mp_opt->data_ack,
                              old_snd_una, new_snd_una,
- -                           new_wnd_end, msk->wnd_end);
+ +                           new_wnd_end, READ_ONCE(msk->wnd_end));
   }
   
   bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
@@@ -1099,8 -1100,8 +1100,8 @@@ static bool add_addr_hmac_valid(struct 
         if (mp_opt->echo)
                 return true;
   
- -      hmac = add_addr_generate_hmac(msk->remote_key,
- -                                    msk->local_key,
+ +      hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key),
+ +                                    READ_ONCE(msk->local_key),
                                       &mp_opt->addr);
   
         pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
@@@ -1147,7 -1148,7 +1148,7 @@@ bool mptcp_incoming_options(struct soc
   
         if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
                 if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
- -                  msk->local_key == mp_opt.rcvr_key) {
+ +                  READ_ONCE(msk->local_key) == mp_opt.rcvr_key) {
                         WRITE_ONCE(msk->rcv_fastclose, true);
                         mptcp_schedule_work((struct sock *)msk);
                         MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX);
diff --combined net/mptcp/protocol.c

index ad39f54b3a81b6745f28cb8d0ed8907bf194ac26,8ef2927ebca297bf60d51fae91732e09562fd496..c7af62c057bc727e456ad0e57f4271c08ce67ea2
--- 1/net/mptcp/protocol.c
--- 2/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@@ -410,7 -410,6 +410,7 @@@ static void mptcp_close_wake_up(struct 
                 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
   }
   
+ +/* called under the msk socket lock */
   static bool mptcp_pending_data_fin_ack(struct sock *sk)
   {
         struct mptcp_sock *msk = mptcp_sk(sk);
@@@ -442,17 -441,16 +442,17 @@@ static void mptcp_check_data_fin_ack(st
         }
   }
   
+ +/* can be called with no lock acquired */
   static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
   {
         struct mptcp_sock *msk = mptcp_sk(sk);
   
         if (READ_ONCE(msk->rcv_data_fin) &&
- -          ((1 << sk->sk_state) &
+ +          ((1 << inet_sk_state_load(sk)) &
              (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
                 u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
   
- -              if (msk->ack_seq == rcv_data_fin_seq) {
+ +              if (READ_ONCE(msk->ack_seq) == rcv_data_fin_seq) {
                         if (seq)
                                 *seq = rcv_data_fin_seq;
   
@@@ -750,7 -748,7 +750,7 @@@ static bool __mptcp_ofo_queue(struct mp
                         __skb_queue_tail(&sk->sk_receive_queue, skb);
                 }
                 msk->bytes_received += end_seq - msk->ack_seq;
- -              msk->ack_seq = end_seq;
+ +              WRITE_ONCE(msk->ack_seq, end_seq);
                 moved = true;
         }
         return moved;
@@@ -987,7 -985,6 +987,7 @@@ static void dfrag_clear(struct sock *sk
         put_page(dfrag->page);
   }
   
+ +/* called under both the msk socket lock and the data lock */
   static void __mptcp_clean_una(struct sock *sk)
   {
         struct mptcp_sock *msk = mptcp_sk(sk);
@@@ -1036,15 -1033,13 +1036,15 @@@
                 msk->recovery = false;
   
   out:
- -      if (snd_una == READ_ONCE(msk->snd_nxt) &&
- -          snd_una == READ_ONCE(msk->write_seq)) {
+ +      if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
                 if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
                         mptcp_stop_rtx_timer(sk);
         } else {
                 mptcp_reset_rtx_timer(sk);
         }
+ +
+ +      if (mptcp_pending_data_fin_ack(sk))
+ +              mptcp_schedule_work(sk);
   }
   
   static void __mptcp_clean_una_wakeup(struct sock *sk)
@@@ -1504,14 -1499,17 +1504,17 @@@ static void mptcp_update_post_push(stru
          */
         if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
                 msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
- -              msk->snd_nxt = snd_nxt_new;
+ +              WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
         }
   }
   
   void mptcp_check_and_set_pending(struct sock *sk)
   {
-       if (mptcp_send_head(sk))
-               mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
+       if (mptcp_send_head(sk)) {
+               mptcp_data_lock(sk);
+               mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
+               mptcp_data_unlock(sk);
+       }
   }
   
   static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
@@@ -1965,6 -1963,9 +1968,9 @@@ static void mptcp_rcv_space_adjust(stru
         if (copied <= 0)
                 return;
   
+       if (!msk->rcvspace_init)
+               mptcp_rcv_space_init(msk, msk->first);
+ 
         msk->rcvq_space.copied += copied;
   
         mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
@@@ -2113,7 -2114,7 +2119,7 @@@ static unsigned int mptcp_inq_hint(cons
   
         skb = skb_peek(&msk->receive_queue);
         if (skb) {
- -              u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+ +              u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
   
                 if (hint_val >= INT_MAX)
                         return INT_MAX;
@@@ -2757,7 -2758,7 +2763,7 @@@ static void __mptcp_init_sock(struct so
         __skb_queue_head_init(&msk->receive_queue);
         msk->out_of_order_queue = RB_ROOT;
         msk->first_pending = NULL;
- -      msk->rmem_fwd_alloc = 0;
+ +      WRITE_ONCE(msk->rmem_fwd_alloc, 0);
         WRITE_ONCE(msk->rmem_released, 0);
         msk->timer_ival = TCP_RTO_MIN;
         msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
@@@ -2973,7 -2974,7 +2979,7 @@@ static void __mptcp_destroy_sock(struc
   
         sk->sk_prot->destroy(sk);
   
- -      WARN_ON_ONCE(msk->rmem_fwd_alloc);
+ +      WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
         WARN_ON_ONCE(msk->rmem_released);
         sk_stream_kill_queues(sk);
         xfrm_sk_free_policy(sk);
@@@ -3147,22 -3148,22 +3153,22 @@@ static int mptcp_disconnect(struct soc
         mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
         WRITE_ONCE(msk->flags, 0);
         msk->cb_flags = 0;
-       msk->push_pending = 0;
         msk->recovery = false;
- -      msk->can_ack = false;
- -      msk->fully_established = false;
- -      msk->rcv_data_fin = false;
- -      msk->snd_data_fin_enable = false;
- -      msk->rcv_fastclose = false;
- -      msk->use_64bit_ack = false;
- -      msk->bytes_consumed = 0;
+ +      WRITE_ONCE(msk->can_ack, false);
+ +      WRITE_ONCE(msk->fully_established, false);
+ +      WRITE_ONCE(msk->rcv_data_fin, false);
+ +      WRITE_ONCE(msk->snd_data_fin_enable, false);
+ +      WRITE_ONCE(msk->rcv_fastclose, false);
+ +      WRITE_ONCE(msk->use_64bit_ack, false);
         WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
         mptcp_pm_data_reset(msk);
         mptcp_ca_reset(sk);
+ +      msk->bytes_consumed = 0;
         msk->bytes_acked = 0;
         msk->bytes_received = 0;
         msk->bytes_sent = 0;
         msk->bytes_retrans = 0;
+       msk->rcvspace_init = 0;
   
         WRITE_ONCE(sk->sk_shutdown, 0);
         sk_error_report(sk);
@@@ -3185,6 -3186,7 +3191,7 @@@ struct sock *mptcp_sk_clone_init(const 
   {
         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
         struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+       struct mptcp_subflow_context *subflow;
         struct mptcp_sock *msk;
   
         if (!nsk)
@@@ -3198,17 -3200,17 +3205,17 @@@
         __mptcp_init_sock(nsk);
   
         msk = mptcp_sk(nsk);
- -      msk->local_key = subflow_req->local_key;
- -      msk->token = subflow_req->token;
+ +      WRITE_ONCE(msk->local_key, subflow_req->local_key);
+ +      WRITE_ONCE(msk->token, subflow_req->token);
         msk->in_accept_queue = 1;
         WRITE_ONCE(msk->fully_established, false);
         if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
                 WRITE_ONCE(msk->csum_enabled, true);
   
- -      msk->write_seq = subflow_req->idsn + 1;
- -      msk->snd_nxt = msk->write_seq;
- -      msk->snd_una = msk->write_seq;
- -      msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ +      WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
+ +      WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ +      WRITE_ONCE(msk->snd_una, msk->write_seq);
+ +      WRITE_ONCE(msk->wnd_end, msk->snd_nxt + req->rsk_rcv_wnd);
         msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
         mptcp_init_sched(msk, mptcp_sk(sk)->sched);
   
@@@ -3225,7 -3227,8 +3232,8 @@@
   
         /* The msk maintain a ref to each subflow in the connections list */
         WRITE_ONCE(msk->first, ssk);
-       list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+       subflow = mptcp_subflow_ctx(ssk);
+       list_add(&subflow->node, &msk->conn_list);
         sock_hold(ssk);
   
         /* new mpc subflow takes ownership of the newly
@@@ -3240,6 -3243,9 +3248,9 @@@
         __mptcp_propagate_sndbuf(nsk, ssk);
   
         mptcp_rcv_space_init(msk, ssk);
+ 
+       if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+               __mptcp_subflow_fully_established(msk, subflow, mp_opt);
         bh_unlock_sock(nsk);
   
         /* note: the newly allocated socket refcount is 2 now */
@@@ -3250,6 -3256,7 +3261,7 @@@ void mptcp_rcv_space_init(struct mptcp_
   {
         const struct tcp_sock *tp = tcp_sk(ssk);
   
+       msk->rcvspace_init = 1;
         msk->rcvq_space.copied = 0;
         msk->rcvq_space.rtt_us = 0;
   
@@@ -3260,8 -3267,6 +3272,6 @@@
                                       TCP_INIT_CWND * tp->advmss);
         if (msk->rcvq_space.space == 0)
                 msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
- 
-       WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
   }
   
   void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
@@@ -3308,6 -3313,9 +3318,6 @@@ void __mptcp_data_acked(struct sock *sk
                 __mptcp_clean_una(sk);
         else
                 __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
- -
- -      if (mptcp_pending_data_fin_ack(sk))
- -              mptcp_schedule_work(sk);
   }
   
   void __mptcp_check_push(struct sock *sk, struct sock *ssk)
@@@ -3332,8 -3340,7 +3342,7 @@@ static void mptcp_release_cb(struct soc
         struct mptcp_sock *msk = mptcp_sk(sk);
   
         for (;;) {
-               unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
-                                     msk->push_pending;
+               unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
                 struct list_head join_list;
   
                 if (!flags)
@@@ -3349,7 -3356,6 +3358,6 @@@
                  *    datapath acquires the msk socket spinlock while helding
                  *    the subflow socket lock
                  */
-               msk->push_pending = 0;
                 msk->cb_flags &= ~flags;
                 spin_unlock_bh(&sk->sk_lock.slock);
   
@@@ -3477,13 -3483,8 +3485,8 @@@ void mptcp_finish_connect(struct sock *
          * accessing the field below
          */
         WRITE_ONCE(msk->local_key, subflow->local_key);
-       WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
-       WRITE_ONCE(msk->snd_nxt, msk->write_seq);
-       WRITE_ONCE(msk->snd_una, msk->write_seq);
   
         mptcp_pm_new_connection(msk, ssk, 0);
- 
-       mptcp_rcv_space_init(msk, ssk);
   }
   
   void mptcp_sock_graft(struct sock *sk, struct socket *parent)
diff --combined net/mptcp/protocol.h

index 421dede93e2b70558722a18c9827766d16bef5c3,ed50f2015dc389d035e919a5d509e12899e22687..c5ec056040eb7cc090ba82df516dc8c24e237f1d
--- 1/net/mptcp/protocol.h
--- 2/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@@ -260,10 -260,8 +260,10 @@@ struct mptcp_data_frag 
   struct mptcp_sock {
         /* inet_connection_sock must be the first member */
         struct inet_connection_sock sk;
- -      u64             local_key;
- -      u64             remote_key;
+ +      u64             local_key;              /* protected by the first subflow socket lock
+ +                                               * lockless access read
+ +                                               */
+ +      u64             remote_key;             /* same as above */
         u64             write_seq;
         u64             bytes_sent;
         u64             snd_nxt;
@@@ -288,7 -286,6 +288,6 @@@
         int             rmem_released;
         unsigned long   flags;
         unsigned long   cb_flags;
-       unsigned long   push_pending;
         bool            recovery;               /* closing subflow write queue reinjected */
         bool            can_ack;
         bool            fully_established;
@@@ -307,7 -304,8 +306,8 @@@
                         nodelay:1,
                         fastopening:1,
                         in_accept_queue:1,
-                       free_first:1;
+                       free_first:1,
+                       rcvspace_init:1;
         struct work_struct work;
         struct sk_buff  *ooo_last_skb;
         struct rb_root  out_of_order_queue;
@@@ -402,7 -400,7 +402,7 @@@ static inline struct mptcp_data_frag *m
   {
         struct mptcp_sock *msk = mptcp_sk(sk);
   
- -      if (msk->snd_una == READ_ONCE(msk->snd_nxt))
+ +      if (msk->snd_una == msk->snd_nxt)
                 return NULL;
   
         return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
@@@ -624,8 -622,9 +624,9 @@@ unsigned int mptcp_stale_loss_cnt(cons
   unsigned int mptcp_close_timeout(const struct sock *sk);
   int mptcp_get_pm_type(const struct net *net);
   const char *mptcp_get_scheduler(const struct net *net);
- void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
-                                    const struct mptcp_options_received *mp_opt);
+ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+                                      struct mptcp_subflow_context *subflow,
+                                      const struct mptcp_options_received *mp_opt);
   bool __mptcp_retransmit_pending_data(struct sock *sk);
   void mptcp_check_and_set_pending(struct sock *sk);
   void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@@ -954,8 -953,8 +955,8 @@@ void mptcp_event_pm_listener(const stru
                              enum mptcp_event_type event);
   bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
   
- void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
-                                  const struct mptcp_options_received *mp_opt);
+ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+                                    const struct mptcp_options_received *mp_opt);
   void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
                                               struct request_sock *req);
   
@@@ -1130,7 -1129,8 +1131,8 @@@ static inline bool subflow_simultaneous
   {
         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
   
-       return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
+       return (1 << sk->sk_state) &
+              (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
                is_active_ssk(subflow) &&
                !subflow->conn_finished;
   }
diff --combined net/mptcp/subflow.c

index d60b83511302b48cd7e65d3291cb477a973bf593,c34ecadee1200a4804ea732df0b3d8a7b4f6e174..02dab0669cfcf38183f24ee51dd2a01ed6f3a3c6
--- 1/net/mptcp/subflow.c
--- 2/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@@ -75,8 -75,7 +75,8 @@@ static void subflow_req_create_thmac(st
   
         get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
   
- -      subflow_generate_hmac(msk->local_key, msk->remote_key,
+ +      subflow_generate_hmac(READ_ONCE(msk->local_key),
+ +                            READ_ONCE(msk->remote_key),
                               subflow_req->local_nonce,
                               subflow_req->remote_nonce, hmac);
   
@@@ -422,29 -421,26 +422,26 @@@ static bool subflow_use_different_dport
   
   void __mptcp_sync_state(struct sock *sk, int state)
   {
+       struct mptcp_subflow_context *subflow;
         struct mptcp_sock *msk = mptcp_sk(sk);
+       struct sock *ssk = msk->first;
+ 
+       subflow = mptcp_subflow_ctx(ssk);
+       __mptcp_propagate_sndbuf(sk, ssk);
+       if (!msk->rcvspace_init)
+               mptcp_rcv_space_init(msk, ssk);
   
-       __mptcp_propagate_sndbuf(sk, msk->first);
         if (sk->sk_state == TCP_SYN_SENT) {
+               /* subflow->idsn is always available is TCP_SYN_SENT state,
+                * even for the FASTOPEN scenarios
+                */
+               WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+               WRITE_ONCE(msk->snd_nxt, msk->write_seq);
                 mptcp_set_state(sk, state);
                 sk->sk_state_change(sk);
         }
   }
   
- static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
- {
-       struct mptcp_sock *msk = mptcp_sk(sk);
- 
-       mptcp_data_lock(sk);
-       if (!sock_owned_by_user(sk)) {
-               __mptcp_sync_state(sk, ssk->sk_state);
-       } else {
-               msk->pending_state = ssk->sk_state;
-               __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
-       }
-       mptcp_data_unlock(sk);
- }
- 
   static void subflow_set_remote_key(struct mptcp_sock *msk,
                                    struct mptcp_subflow_context *subflow,
                                    const struct mptcp_options_received *mp_opt)
@@@ -466,6 -462,31 +463,31 @@@
         atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
   }
   
+ static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
+                                 struct mptcp_subflow_context *subflow,
+                                 const struct mptcp_options_received *mp_opt)
+ {
+       struct mptcp_sock *msk = mptcp_sk(sk);
+ 
+       mptcp_data_lock(sk);
+       if (mp_opt) {
+               /* Options are available only in the non fallback cases
+                * avoid updating rx path fields otherwise
+                */
+               WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+               WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+               subflow_set_remote_key(msk, subflow, mp_opt);
+       }
+ 
+       if (!sock_owned_by_user(sk)) {
+               __mptcp_sync_state(sk, ssk->sk_state);
+       } else {
+               msk->pending_state = ssk->sk_state;
+               __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+       }
+       mptcp_data_unlock(sk);
+ }
+ 
   static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
   {
         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@@ -500,10 -521,9 +522,9 @@@
                 if (mp_opt.deny_join_id0)
                         WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
                 subflow->mp_capable = 1;
-               subflow_set_remote_key(msk, subflow, &mp_opt);
                 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
                 mptcp_finish_connect(sk);
-               mptcp_propagate_state(parent, sk);
+               mptcp_propagate_state(parent, sk, subflow, &mp_opt);
         } else if (subflow->request_join) {
                 u8 hmac[SHA256_DIGEST_SIZE];
   
@@@ -546,8 -566,7 +567,7 @@@
                 }
         } else if (mptcp_check_fallback(sk)) {
   fallback:
-               mptcp_rcv_space_init(msk, sk);
-               mptcp_propagate_state(parent, sk);
+               mptcp_propagate_state(parent, sk, subflow, NULL);
         }
         return;
   
@@@ -695,8 -714,7 +715,8 @@@ static bool subflow_hmac_valid(const st
         if (!msk)
                 return false;
   
- -      subflow_generate_hmac(msk->remote_key, msk->local_key,
+ +      subflow_generate_hmac(READ_ONCE(msk->remote_key),
+ +                            READ_ONCE(msk->local_key),
                               subflow_req->remote_nonce,
                               subflow_req->local_nonce, hmac);
   
@@@ -733,17 -751,16 +753,16 @@@ void mptcp_subflow_drop_ctx(struct soc
         kfree_rcu(ctx, rcu);
   }
   
- void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
-                                    const struct mptcp_options_received *mp_opt)
+ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+                                      struct mptcp_subflow_context *subflow,
+                                      const struct mptcp_options_received *mp_opt)
   {
-       struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- 
         subflow_set_remote_key(msk, subflow, mp_opt);
         subflow->fully_established = 1;
         WRITE_ONCE(msk->fully_established, true);
   
         if (subflow->is_mptfo)
-               mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+               __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
   }
   
   static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@@ -836,7 -853,6 +855,6 @@@ create_child
                          * mpc option
                          */
                         if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
-                               mptcp_subflow_fully_established(ctx, &mp_opt);
                                 mptcp_pm_fully_established(owner, child);
                                 ctx->pm_notified = 1;
                         }
@@@ -1532,8 -1548,8 +1550,8 @@@ int __mptcp_subflow_connect(struct soc
         mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
                                              &flags, &ifindex);
         subflow->remote_key_valid = 1;
- -      subflow->remote_key = msk->remote_key;
- -      subflow->local_key = msk->local_key;
+ +      subflow->remote_key = READ_ONCE(msk->remote_key);
+ +      subflow->local_key = READ_ONCE(msk->local_key);
         subflow->token = msk->token;
         mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
   
@@@ -1746,10 -1762,9 +1764,9 @@@ static void subflow_state_change(struc
         msk = mptcp_sk(parent);
         if (subflow_simultaneous_connect(sk)) {
                 mptcp_do_fallback(sk);
-               mptcp_rcv_space_init(msk, sk);
                 pr_fallback(msk);
                 subflow->conn_finished = 1;
-               mptcp_propagate_state(parent, sk);
+               mptcp_propagate_state(parent, sk, subflow, NULL);
         }
   
         /* as recvmsg() does not acquire the subflow socket for ssk selection
diff --combined net/sched/act_mirred.c

index 93a96e9d8d900c238c9a84d75201b6edf01ba198,0a1a9e40f237012ecaa561bd563162bbc1802f9b..6f4bb1c8ce7bdbf465da63214b780265fd0e8ea9
--- 1/net/sched/act_mirred.c
--- 2/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@@ -533,8 -533,6 +533,6 @@@ static int mirred_device_event(struct n
                                  * net_device are already rcu protected.
                                  */
                                 RCU_INIT_POINTER(m->tcfm_dev, NULL);
-                       } else if (m->tcfm_blockid) {
-                               m->tcfm_blockid = 0;
                         }
                         spin_unlock_bh(&m->tcf_lock);
                 }
@@@ -643,7 -641,6 +641,7 @@@ static struct tc_action_ops act_mirred_
         .size           =       sizeof(struct tcf_mirred),
         .get_dev        =       tcf_mirred_get_dev,
   };
+ +MODULE_ALIAS_NET_ACT("mirred");
   
   static __net_init int mirred_init_net(struct net *net)
   {
diff --combined net/smc/af_smc.c

index 66763c74ab7679be38bf486c70514620c9c1a9cc,0f53a5c6fd9d9c88c78f51640b179bf214e78bda..4b52b3b159c0ec618506988810d56aa71278da9d
--- 1/net/smc/af_smc.c
--- 2/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@@ -924,6 -924,7 +924,7 @@@ static int smc_switch_to_fallback(struc
                 smc->clcsock->file->private_data = smc->clcsock;
                 smc->clcsock->wq.fasync_list =
                         smc->sk.sk_socket->wq.fasync_list;
+               smc->sk.sk_socket->wq.fasync_list = NULL;
   
                 /* There might be some wait entries remaining
                  * in smc sk->sk_wq and they should be woken up
@@@ -1045,7 -1046,7 +1046,7 @@@ static int smc_find_ism_v2_device_clnt(
         int rc = SMC_CLC_DECL_NOSMCDDEV;
         struct smcd_dev *smcd;
         int i = 1, entry = 1;
- -      bool is_virtual;
+ +      bool is_emulated;
         u16 chid;
   
         if (smcd_indicated(ini->smc_type_v1))
@@@ -1057,12 -1058,12 +1058,12 @@@
                 chid = smc_ism_get_chid(smcd);
                 if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
                         continue;
- -              is_virtual = __smc_ism_is_virtual(chid);
+ +              is_emulated = __smc_ism_is_emulated(chid);
                 if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
                     smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
- -                      if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
+ +                      if (is_emulated && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
                                 /* It's the last GID-CHID entry left in CLC
- -                               * Proposal SMC-Dv2 extension, but a virtual
+ +                               * Proposal SMC-Dv2 extension, but an Emulated-
                                  * ISM device will take two entries. So give
                                  * up it and try the next potential ISM device.
                                  */
@@@ -1072,7 -1073,7 +1073,7 @@@
                         ini->is_smcd = true;
                         rc = 0;
                         i++;
- -                      entry = is_virtual ? entry + 2 : entry + 1;
+ +                      entry = is_emulated ? entry + 2 : entry + 1;
                         if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES)
                                 break;
                 }
@@@ -1413,10 -1414,10 +1414,10 @@@ static int smc_connect_ism(struct smc_s
                 if (rc)
                         return rc;
   
- -              if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected]))
+ +              if (__smc_ism_is_emulated(ini->ism_chid[ini->ism_selected]))
                         ini->ism_peer_gid[ini->ism_selected].gid_ext =
                                                 ntohll(aclc->d1.gid_ext);
- -              /* for non-virtual ISM devices, peer gid_ext remains 0. */
+ +              /* for non-Emulated-ISM devices, peer gid_ext remains 0. */
         }
         ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
   
@@@ -2117,10 -2118,10 +2118,10 @@@ static void smc_check_ism_v2_match(stru
                 if (smc_ism_get_chid(smcd) == proposed_chid &&
                     !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
                         ini->ism_peer_gid[*matches].gid = proposed_gid->gid;
- -                      if (__smc_ism_is_virtual(proposed_chid))
+ +                      if (__smc_ism_is_emulated(proposed_chid))
                                 ini->ism_peer_gid[*matches].gid_ext =
                                                         proposed_gid->gid_ext;
- -                              /* non-virtual ISM's peer gid_ext remains 0. */
+ +                              /* non-Emulated-ISM's peer gid_ext remains 0. */
                         ini->ism_dev[*matches] = smcd;
                         (*matches)++;
                         break;
@@@ -2170,10 -2171,10 +2171,10 @@@ static void smc_find_ism_v2_device_serv
                 smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
                 smcd_gid.gid_ext = 0;
                 chid = ntohs(smcd_v2_ext->gidchid[i].chid);
- -              if (__smc_ism_is_virtual(chid)) {
+ +              if (__smc_ism_is_emulated(chid)) {
                         if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
                             chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid))
- -                              /* each virtual ISM device takes two GID-CHID
+ +                              /* each Emulated-ISM device takes two GID-CHID
                                  * entries and CHID of the second entry repeats
                                  * that of the first entry.
                                  *
diff --combined net/unix/garbage.c

index 3e4b986de94b95bf3b3aac315c1674191ecb963c,2ff7ddbaa782e341e1614a4d5bd295c87664e7dd..51acf795f096016d132cdeb067214e28d6326614
--- 1/net/unix/garbage.c
--- 2/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@@ -81,80 -81,12 +81,80 @@@
   #include <net/scm.h>
   #include <net/tcp_states.h>
   
- -#include "scm.h"
+ +struct unix_sock *unix_get_socket(struct file *filp)
+ +{
+ +      struct inode *inode = file_inode(filp);
+ +
+ +      /* Socket ? */
+ +      if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+ +              struct socket *sock = SOCKET_I(inode);
+ +              const struct proto_ops *ops;
+ +              struct sock *sk = sock->sk;
+ +
+ +              ops = READ_ONCE(sock->ops);
   
- -/* Internal data structures and random procedures: */
+ +              /* PF_UNIX ? */
+ +              if (sk && ops && ops->family == PF_UNIX)
+ +                      return unix_sk(sk);
+ +      }
+ +
+ +      return NULL;
+ +}
   
+ +DEFINE_SPINLOCK(unix_gc_lock);
+ +unsigned int unix_tot_inflight;
   static LIST_HEAD(gc_candidates);
- -static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
+ +static LIST_HEAD(gc_inflight_list);
+ +
+ +/* Keep the number of times in flight count for the file
+ + * descriptor if it is for an AF_UNIX socket.
+ + */
+ +void unix_inflight(struct user_struct *user, struct file *filp)
+ +{
+ +      struct unix_sock *u = unix_get_socket(filp);
+ +
+ +      spin_lock(&unix_gc_lock);
+ +
+ +      if (u) {
+ +              if (!u->inflight) {
+ +                      WARN_ON_ONCE(!list_empty(&u->link));
+ +                      list_add_tail(&u->link, &gc_inflight_list);
+ +              } else {
+ +                      WARN_ON_ONCE(list_empty(&u->link));
+ +              }
+ +              u->inflight++;
+ +
+ +              /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ +              WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+ +      }
+ +
+ +      WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
+ +
+ +      spin_unlock(&unix_gc_lock);
+ +}
+ +
+ +void unix_notinflight(struct user_struct *user, struct file *filp)
+ +{
+ +      struct unix_sock *u = unix_get_socket(filp);
+ +
+ +      spin_lock(&unix_gc_lock);
+ +
+ +      if (u) {
+ +              WARN_ON_ONCE(!u->inflight);
+ +              WARN_ON_ONCE(list_empty(&u->link));
+ +
+ +              u->inflight--;
+ +              if (!u->inflight)
+ +                      list_del_init(&u->link);
+ +
+ +              /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ +              WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+ +      }
+ +
+ +      WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
+ +
+ +      spin_unlock(&unix_gc_lock);
+ +}
   
   static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
                           struct sk_buff_head *hitlist)
@@@ -173,15 -105,20 +173,15 @@@
   
                         while (nfd--) {
                                 /* Get the socket the fd matches if it indeed does so */
- -                              struct sock *sk = unix_get_socket(*fp++);
- -
- -                              if (sk) {
- -                                      struct unix_sock *u = unix_sk(sk);
+ +                              struct unix_sock *u = unix_get_socket(*fp++);
   
- -                                      /* Ignore non-candidates, they could
- -                                       * have been added to the queues after
- -                                       * starting the garbage collection
- -                                       */
- -                                      if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
- -                                              hit = true;
+ +                              /* Ignore non-candidates, they could have been added
+ +                               * to the queues after starting the garbage collection
+ +                               */
+ +                              if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
+ +                                      hit = true;
   
- -                                              func(u);
- -                                      }
+ +                                      func(u);
                                 }
                         }
                         if (hit && hitlist != NULL) {
@@@ -214,7 -151,7 +214,7 @@@ static void scan_children(struct sock *
                         /* An embryo cannot be in-flight, so it's safe
                          * to use the list link.
                          */
- -                      BUG_ON(!list_empty(&u->link));
+ +                      WARN_ON_ONCE(!list_empty(&u->link));
                         list_add_tail(&u->link, &embryos);
                 }
                 spin_unlock(&x->sk_receive_queue.lock);
@@@ -229,18 -166,17 +229,18 @@@
   
   static void dec_inflight(struct unix_sock *usk)
   {
- -      atomic_long_dec(&usk->inflight);
+ +      usk->inflight--;
   }
   
   static void inc_inflight(struct unix_sock *usk)
   {
- -      atomic_long_inc(&usk->inflight);
+ +      usk->inflight++;
   }
   
   static void inc_inflight_move_tail(struct unix_sock *u)
   {
- -      atomic_long_inc(&u->inflight);
+ +      u->inflight++;
+ +
         /* If this still might be part of a cycle, move it to the end
          * of the list, so that it's checked even if it was already
          * passed over
@@@ -250,16 -186,40 +250,16 @@@
   }
   
   static bool gc_in_progress;
- -#define UNIX_INFLIGHT_TRIGGER_GC 16000
- -
- -void wait_for_unix_gc(void)
- -{
- -      /* If number of inflight sockets is insane,
- -       * force a garbage collect right now.
- -       * Paired with the WRITE_ONCE() in unix_inflight(),
- -       * unix_notinflight() and gc_in_progress().
- -       */
- -      if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
- -          !READ_ONCE(gc_in_progress))
- -              unix_gc();
- -      wait_event(unix_gc_wait, gc_in_progress == false);
- -}
   
- -/* The external entry point: unix_gc() */
- -void unix_gc(void)
+ +static void __unix_gc(struct work_struct *work)
   {
- -      struct sk_buff *next_skb, *skb;
- -      struct unix_sock *u;
- -      struct unix_sock *next;
         struct sk_buff_head hitlist;
- -      struct list_head cursor;
+ +      struct unix_sock *u, *next;
         LIST_HEAD(not_cycle_list);
+ +      struct list_head cursor;
   
         spin_lock(&unix_gc_lock);
   
- -      /* Avoid a recursive GC. */
- -      if (gc_in_progress)
- -              goto out;
- -
- -      /* Paired with READ_ONCE() in wait_for_unix_gc(). */
- -      WRITE_ONCE(gc_in_progress, true);
- -
         /* First, select candidates for garbage collection.  Only
          * in-flight sockets are considered, and from those only ones
          * which don't have any external reference.
@@@ -277,12 -237,14 +277,12 @@@
          */
         list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
                 long total_refs;
- -              long inflight_refs;
   
                 total_refs = file_count(u->sk.sk_socket->file);
- -              inflight_refs = atomic_long_read(&u->inflight);
   
- -              BUG_ON(inflight_refs < 1);
- -              BUG_ON(total_refs < inflight_refs);
- -              if (total_refs == inflight_refs) {
+ +              WARN_ON_ONCE(!u->inflight);
+ +              WARN_ON_ONCE(total_refs < u->inflight);
+ +              if (total_refs == u->inflight) {
                         list_move_tail(&u->link, &gc_candidates);
                         __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
                         __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
@@@ -309,7 -271,7 +309,7 @@@
                 /* Move cursor to after the current position. */
                 list_move(&cursor, &u->link);
   
- -              if (atomic_long_read(&u->inflight) > 0) {
+ +              if (u->inflight) {
                         list_move_tail(&u->link, &not_cycle_list);
                         __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
                         scan_children(&u->sk, inc_inflight_move_tail, NULL);
@@@ -336,14 -298,28 +336,15 @@@
   
         spin_unlock(&unix_gc_lock);
   
- -      /* We need io_uring to clean its registered files, ignore all io_uring
- -       * originated skbs. It's fine as io_uring doesn't keep references to
- -       * other io_uring instances and so killing all other files in the cycle
- -       * will put all io_uring references forcing it to go through normal
- -       * release.path eventually putting registered files.
- -       */
- -      skb_queue_walk_safe(&hitlist, skb, next_skb) {
- -              if (skb->destructor == io_uring_destruct_scm) {
- -                      __skb_unlink(skb, &hitlist);
- -                      skb_queue_tail(&skb->sk->sk_receive_queue, skb);
- -              }
- -      }
- -
         /* Here we are. Hitlist is filled. Die. */
         __skb_queue_purge(&hitlist);
   
   #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
-       list_for_each_entry_safe(u, next, &gc_candidates, link) {
-               struct sk_buff *skb = u->oob_skb;
+       while (!list_empty(&gc_candidates)) {
+               u = list_entry(gc_candidates.next, struct unix_sock, link);
+               if (u->oob_skb) {
+                       struct sk_buff *skb = u->oob_skb;
   
-               if (skb) {
                         u->oob_skb = NULL;
                         kfree_skb(skb);
                 }
@@@ -352,45 -328,20 +353,45 @@@
   
         spin_lock(&unix_gc_lock);
   
- -      /* There could be io_uring registered files, just push them back to
- -       * the inflight list
- -       */
- -      list_for_each_entry_safe(u, next, &gc_candidates, link)
- -              list_move_tail(&u->link, &gc_inflight_list);
- -
         /* All candidates should have been detached by now. */
- -      BUG_ON(!list_empty(&gc_candidates));
+ +      WARN_ON_ONCE(!list_empty(&gc_candidates));
   
         /* Paired with READ_ONCE() in wait_for_unix_gc(). */
         WRITE_ONCE(gc_in_progress, false);
   
- -      wake_up(&unix_gc_wait);
- -
- - out:
         spin_unlock(&unix_gc_lock);
   }
+ +
+ +static DECLARE_WORK(unix_gc_work, __unix_gc);
+ +
+ +void unix_gc(void)
+ +{
+ +      WRITE_ONCE(gc_in_progress, true);
+ +      queue_work(system_unbound_wq, &unix_gc_work);
+ +}
+ +
+ +#define UNIX_INFLIGHT_TRIGGER_GC 16000
+ +#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
+ +
+ +void wait_for_unix_gc(struct scm_fp_list *fpl)
+ +{
+ +      /* If number of inflight sockets is insane,
+ +       * force a garbage collect right now.
+ +       *
+ +       * Paired with the WRITE_ONCE() in unix_inflight(),
+ +       * unix_notinflight(), and __unix_gc().
+ +       */
+ +      if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ +          !READ_ONCE(gc_in_progress))
+ +              unix_gc();
+ +
+ +      /* Penalise users who want to send AF_UNIX sockets
+ +       * but whose sockets have not been received yet.
+ +       */
+ +      if (!fpl || !fpl->count_unix ||
+ +          READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
+ +              return;
+ +
+ +      if (READ_ONCE(gc_in_progress))
+ +              flush_work(&unix_gc_work);
+ +}
diff --combined net/xfrm/xfrm_user.c

index dc4f9b8d7cb0fbb4ec3baf23b015c4bf4c916e31,f037be190baeacf8a7fc4c26240dd224a39cb984..a5232dcfea46b594579a6e52346ccb608c7e01b7
--- 1/net/xfrm/xfrm_user.c
--- 2/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@@ -902,7 -902,7 +902,7 @@@ static void copy_to_user_state(struct x
         memcpy(&p->sel, &x->sel, sizeof(p->sel));
         memcpy(&p->lft, &x->lft, sizeof(p->lft));
         if (x->xso.dev)
- -              xfrm_dev_state_update_curlft(x);
+ +              xfrm_dev_state_update_stats(x);
         memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
         put_unaligned(x->stats.replay_window, &p->stats.replay_window);
         put_unaligned(x->stats.replay, &p->stats.replay);
@@@ -3888,5 -3888,6 +3888,6 @@@ static void __exit xfrm_user_exit(void
   
   module_init(xfrm_user_init);
   module_exit(xfrm_user_exit);
+ MODULE_DESCRIPTION("XFRM User interface");
   MODULE_LICENSE("GPL");
   MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);
diff --combined tools/testing/selftests/net/openvswitch/openvswitch.sh

index 87b80bee6df4074d63f537db06478f92ff81b3bc,36e40256ab92a696de62339dd7c7342df3468372..5cae5354384914928dc0f5568cb4ed3948862ec5
--- 1/tools/testing/selftests/net/openvswitch/openvswitch.sh
--- 2/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@@ -17,7 -17,6 +17,7 @@@ tests=
         ct_connect_v4                           ip4-ct-xon: Basic ipv4 tcp connection using ct
         connect_v4                              ip4-xon: Basic ipv4 ping between two NS
         nat_connect_v4                          ip4-nat-xon: Basic ipv4 tcp connection via NAT
+ +      nat_related_v4                          ip4-nat-related: ICMP related matches work with SNAT
         netlink_checks                          ovsnl: validate netlink attrs and settings
         upcall_interfaces                       ovs: test the upcall interfaces
         drop_reason                             drop: test drop reasons are emitted"
@@@ -474,67 -473,6 +474,67 @@@ test_nat_connect_v4 () 
         return 0
   }
   
+ +# nat_related_v4 test
+ +#  - client->server ip packets go via SNAT
+ +#  - client solicits ICMP destination unreachable packet from server
+ +#  - undo NAT for ICMP reply and test dst ip has been updated
+ +test_nat_related_v4 () {
+ +      which nc >/dev/null 2>/dev/null || return $ksft_skip
+ +
+ +      sbx_add "test_nat_related_v4" || return $?
+ +
+ +      ovs_add_dp "test_nat_related_v4" natrelated4 || return 1
+ +      info "create namespaces"
+ +      for ns in client server; do
+ +              ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \
+ +                      "${ns:0:1}0" "${ns:0:1}1" || return 1
+ +      done
+ +
+ +      ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ +      ip netns exec client ip link set c1 up
+ +      ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ +      ip netns exec server ip link set s1 up
+ +
+ +      ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10
+ +
+ +      # Allow ARP
+ +      ovs_add_flow "test_nat_related_v4" natrelated4 \
+ +              "in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1
+ +      ovs_add_flow "test_nat_related_v4" natrelated4 \
+ +              "in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1
+ +
+ +      # Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20
+ +      ovs_add_flow "test_nat_related_v4" natrelated4 \
+ +              "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \
+ +              "ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1
+ +      ovs_add_flow "test_nat_related_v4" natrelated4 \
+ +              "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \
+ +              "2" || return 1
+ +
+ +      # Allow related ICMP responses back from server and undo NAT to restore original IP
+ +      # Drop any ICMP related packets where dst ip hasn't been restored back to original IP
+ +      ovs_add_flow "test_nat_related_v4" natrelated4 \
+ +              "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \
+ +              "ct(commit,nat),recirc(0x2)" || return 1
+ +      ovs_add_flow "test_nat_related_v4" natrelated4 \
+ +              "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \
+ +              "1" || return 1
+ +      ovs_add_flow "test_nat_related_v4" natrelated4 \
+ +              "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \
+ +              "drop" || return 1
+ +
+ +      # Solicit destination unreachable response from server
+ +      ovs_sbx "test_nat_related_v4" ip netns exec client \
+ +              bash -c "echo a | nc -u -w 1 172.31.110.20 10000"
+ +
+ +      # Check to make sure no packets matched the drop rule with incorrect dst ip
+ +      python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \
+ +              | grep "drop" | grep "packets:0" >/dev/null || return 1
+ +
+ +      info "done..."
+ +      return 0
+ +}
+ +
   # netlink_validation
   # - Create a dp
   # - check no warning with "old version" simulation
@@@ -564,7 -502,20 +564,20 @@@ test_netlink_checks () 
             wc -l) == 2 ] || \
               return 1
   
+       info "Checking clone depth"
         ERR_MSG="Flow actions may not be safe on all matching packets"
+       PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+       ovs_add_flow "test_netlink_checks" nv0 \
+               'in_port(1),eth(),eth_type(0x800),ipv4()' \
+               'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \
+               >/dev/null 2>&1 && return 1
+       POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+ 
+       if [ "$PRE_TEST" == "$POST_TEST" ]; then
+               info "failed - clone depth too large"
+               return 1
+       fi
+ 
         PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
         ovs_add_flow "test_netlink_checks" nv0 \
                 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \
author	Jakub Kicinski <kuba@kernel.org>
	Thu, 15 Feb 2024 22:01:43 +0000 (14:01 -0800)
committer	Jakub Kicinski <kuba@kernel.org>
	Fri, 16 Feb 2024 00:20:04 +0000 (16:20 -0800)
		1	2
.mailmap	patch \|	diff1 \|	diff2 \|	blob \| history
Documentation/netlink/specs/dpll.yaml	patch \|	diff1 \|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dpll/dpll_netlink.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/bonding/bond_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx5/core/dpll.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/renesas/ravb_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/netdevice.h	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/rtnetlink.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/ip_gre.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/ip_output.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/ip_tunnel.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/ip_vti.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/ipip.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/sit.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/mptcp/options.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/mptcp/protocol.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/mptcp/protocol.h	patch \|	diff1 \|	diff2 \|	blob \| history
net/mptcp/subflow.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/act_mirred.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/smc/af_smc.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/unix/garbage.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/xfrm/xfrm_user.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/net/openvswitch/openvswitch.sh	patch \|	diff1 \|	diff2 \|	blob \| history