Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

author David S. Miller <davem@davemloft.net>

Tue, 22 Aug 2017 00:06:42 +0000 (17:06 -0700)

committer David S. Miller <davem@davemloft.net>

Tue, 22 Aug 2017 00:06:42 +0000 (17:06 -0700)
author David S. Miller <davem@davemloft.net>
Tue, 22 Aug 2017 00:06:42 +0000 (17:06 -0700)
committer David S. Miller <davem@davemloft.net>
Tue, 22 Aug 2017 00:06:42 +0000 (17:06 -0700)
diff --combined MAINTAINERS

index 0e967b3ca1c612ad4b3e82d29a6a2cb26102df63,1c3feffb1c1cfd2b46685907300a9f026fb67e6a..11e1bcec9cbbe8c5840fead4a0e6e93e84a33538
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -2477,7 -2477,7 +2477,7 @@@ Q:      https://patchwork.open-mesh.org/proj
   S:    Maintained
   F:    Documentation/ABI/testing/sysfs-class-net-batman-adv
   F:    Documentation/ABI/testing/sysfs-class-net-mesh
- -F:    Documentation/networking/batman-adv.txt
+ +F:    Documentation/networking/batman-adv.rst
   F:    include/uapi/linux/batman_adv.h
   F:    net/batman-adv/
   
@@@ -5101,7 -5101,6 +5101,7 @@@ F:      include/linux/of_net.
   F:    include/linux/phy.h
   F:    include/linux/phy_fixed.h
   F:    include/linux/platform_data/mdio-gpio.h
+ +F:    include/linux/platform_data/mdio-bcm-unimac.h
   F:    include/trace/events/mdio.h
   F:    include/uapi/linux/mdio.h
   F:    include/uapi/linux/mii.h
@@@ -6148,14 -6147,6 +6148,14 @@@ S:    Maintaine
   F:    drivers/net/ethernet/hisilicon/
   F:    Documentation/devicetree/bindings/net/hisilicon*.txt
   
+ +HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
+ +M:    Yisen Zhuang <yisen.zhuang@huawei.com>
+ +M:    Salil Mehta <salil.mehta@huawei.com>
+ +L:    netdev@vger.kernel.org
+ +W:    http://www.hisilicon.com
+ +S:    Maintained
+ +F:    drivers/net/ethernet/hisilicon/hns3/
+ +
   HISILICON ROCE DRIVER
   M:    Lijun Ou <oulijun@huawei.com>
   M:    Wei Hu(Xavier) <xavier.huwei@huawei.com>
@@@ -6266,7 -6257,6 +6266,7 @@@ M:      Haiyang Zhang <haiyangz@microsoft.co
   M:    Stephen Hemminger <sthemmin@microsoft.com>
   L:    devel@linuxdriverproject.org
   S:    Maintained
+ +F:    Documentation/networking/netvsc.txt
   F:    arch/x86/include/asm/mshyperv.h
   F:    arch/x86/include/uapi/asm/hyperv.h
   F:    arch/x86/kernel/cpu/mshyperv.c
@@@ -7120,7 -7110,6 +7120,6 @@@ M:      Marc Zyngier <marc.zyngier@arm.com
   L:    linux-kernel@vger.kernel.org
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
- T:    git git://git.infradead.org/users/jcooper/linux.git irqchip/core
   F:    Documentation/devicetree/bindings/interrupt-controller/
   F:    drivers/irqchip/
   
@@@ -8434,9 -8423,7 +8433,9 @@@ F:      include/uapi/linux/uvcvideo.
   
   MEDIATEK ETHERNET DRIVER
   M:    Felix Fietkau <nbd@openwrt.org>
- -M:    John Crispin <blogic@openwrt.org>
+ +M:    John Crispin <john@phrozen.org>
+ +M:    Sean Wang <sean.wang@mediatek.com>
+ +M:    Nelson Chang <nelson.chang@mediatek.com>
   L:    netdev@vger.kernel.org
   S:    Maintained
   F:    drivers/net/ethernet/mediatek/
diff --combined drivers/net/ethernet/mellanox/mlx4/main.c

index 3797491f4b6bfb7da67e1be2cacceaa5b5c0d069,5fe5cdc5135776abb8fd4df748b0948520504b48..9ea2b0db62290b015dd457c76f14295c67923270
--- 1/drivers/net/ethernet/mellanox/mlx4/main.c
--- 2/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@@ -432,7 -432,7 +432,7 @@@ static int mlx4_dev_cap(struct mlx4_de
                 /* Virtual PCI function needs to determine UAR page size from
                  * firmware. Only master PCI function can set the uar page size
                  */
-               if (enable_4k_uar)
+               if (enable_4k_uar || !dev->persist->num_vfs)
                         dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
                 else
                         dev->uar_page_shift = PAGE_SHIFT;
@@@ -925,10 -925,10 +925,10 @@@ static int mlx4_slave_cap(struct mlx4_d
         mlx4_replace_zero_macs(dev);
   
         dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
- -      dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
- -      dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
- -      dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
- -      dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ +      dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
+ +      dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
+ +      dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
+ +      dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
   
         if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
             !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
@@@ -2277,7 -2277,7 +2277,7 @@@ static int mlx4_init_hca(struct mlx4_de
   
                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
   
-               if (enable_4k_uar) {
+               if (enable_4k_uar || !dev->persist->num_vfs) {
                         init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
                                                     PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
                         init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
@@@ -2399,7 -2399,7 +2399,7 @@@
                 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
         }
         priv->eq_table.inta_pin = adapter.inta_pin;
- -      memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
+ +      memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
   
         return 0;
   
@@@ -2869,7 -2869,7 +2869,7 @@@ static void mlx4_enable_msi_x(struct ml
                                 dev->caps.num_eqs - dev->caps.reserved_eqs,
                                 MAX_MSIX);
   
- -              entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
+ +              entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL);
                 if (!entries)
                         goto no_msi;
   
@@@ -3782,6 -3782,7 +3782,6 @@@ err_release_regions
   
   err_disable_pdev:
         mlx4_pci_disable_device(&priv->dev);
- -      pci_set_drvdata(pdev, NULL);
         return err;
   }
   
@@@ -3996,6 -3997,7 +3996,6 @@@ static void mlx4_remove_one(struct pci_
         devlink_unregister(devlink);
         kfree(dev->persist);
         devlink_free(devlink);
- -      pci_set_drvdata(pdev, NULL);
   }
   
   static int restore_current_port_types(struct mlx4_dev *dev,
diff --combined drivers/net/ethernet/netronome/nfp/nfp_net_common.c

index 4a990033c4d5702dd9ca55862aae0ec997b2973c,9f77ce038a4a339260e0118bb6a9f61e60547870..732f1d315fba55dbbca9b6fc6118f08cc6bc676b
--- 1/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
--- 2/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@@ -908,8 -908,7 +908,7 @@@ static int nfp_net_tx(struct sk_buff *s
         return NETDEV_TX_OK;
   
   err_unmap:
-       --f;
-       while (f >= 0) {
+       while (--f >= 0) {
                 frag = &skb_shinfo(skb)->frags[f];
                 dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
                                skb_frag_size(frag), DMA_TO_DEVICE);
@@@ -2660,7 -2659,6 +2659,7 @@@ static int nfp_net_netdev_close(struct 
         /* Step 2: Tell NFP
          */
         nfp_net_clear_config_and_disable(nn);
+ +      nfp_port_configure(netdev, false);
   
         /* Step 3: Free resources
          */
@@@ -2778,21 -2776,16 +2777,21 @@@ static int nfp_net_netdev_open(struct n
                 goto err_free_all;
   
         /* Step 2: Configure the NFP
+ +       * - Ifup the physical interface if it exists
          * - Enable rings from 0 to tx_rings/rx_rings - 1.
          * - Write MAC address (in case it changed)
          * - Set the MTU
          * - Set the Freelist buffer size
          * - Enable the FW
          */
- -      err = nfp_net_set_config_and_enable(nn);
+ +      err = nfp_port_configure(netdev, true);
         if (err)
                 goto err_free_all;
   
+ +      err = nfp_net_set_config_and_enable(nn);
+ +      if (err)
+ +              goto err_port_disable;
+ +
         /* Step 3: Enable for kernel
          * - put some freelist descriptors on each RX ring
          * - enable NAPI on each ring
@@@ -2803,8 -2796,6 +2802,8 @@@
   
         return 0;
   
+ +err_port_disable:
+ +      nfp_port_configure(netdev, false);
   err_free_all:
         nfp_net_close_free_all(nn);
         return err;
diff --combined drivers/net/tun.c

index 19cbbbb1b63bbd74fcf7e74faf6541b3b7472b65,0a2c0a42283f780b947fcf52fcb4220e73f0fa2f..06e8f0bb2dab07b01c8d10737fc9cf30c489bd56
--- 1/drivers/net/tun.c
--- 2/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@@ -73,8 -73,6 +73,8 @@@
   #include <linux/seq_file.h>
   #include <linux/uio.h>
   #include <linux/skb_array.h>
+ +#include <linux/bpf.h>
+ +#include <linux/bpf_trace.h>
   
   #include <linux/uaccess.h>
   
@@@ -107,9 -105,6 +107,9 @@@ do {                                                               
   } while (0)
   #endif
   
+ +#define TUN_HEADROOM 256
+ +#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD + TUN_HEADROOM)
+ +
   /* TUN device flags */
   
   /* IFF_ATTACH_QUEUE is never stored in device flags,
@@@ -204,7 -199,7 +204,7 @@@ struct tun_struct 
         struct net_device       *dev;
         netdev_features_t       set_features;
   #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
- -                        NETIF_F_TSO6|NETIF_F_UFO)
+ +                        NETIF_F_TSO6)
   
         int                     align;
         int                     vnet_hdr_sz;
@@@ -226,7 -221,6 +226,7 @@@
         u32 flow_count;
         u32 rx_batched;
         struct tun_pcpu_stats __percpu *pcpu_stats;
+ +      struct bpf_prog __rcu *xdp_prog;
   };
   
   #ifdef CONFIG_TUN_VNET_CROSS_LE
@@@ -591,7 -585,6 +591,7 @@@ static void tun_detach(struct tun_file 
   static void tun_detach_all(struct net_device *dev)
   {
         struct tun_struct *tun = netdev_priv(dev);
+ +      struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
         struct tun_file *tfile, *tmp;
         int i, n = tun->numqueues;
   
@@@ -624,9 -617,6 +624,9 @@@
         }
         BUG_ON(tun->numdisabled != 0);
   
+ +      if (xdp_prog)
+ +              bpf_prog_put(xdp_prog);
+ +
         if (tun->flags & IFF_PERSIST)
                 module_put(THIS_MODULE);
   }
@@@ -902,7 -892,7 +902,7 @@@ static netdev_tx_t tun_net_xmit(struct 
             sk_filter(tfile->socket.sk, skb))
                 goto drop;
   
- -      if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+ +      if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
                 goto drop;
   
         skb_tx_timestamp(skb);
@@@ -1013,46 -1003,6 +1013,46 @@@ tun_net_get_stats64(struct net_device *
         stats->tx_dropped = tx_dropped;
   }
   
+ +static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ +                     struct netlink_ext_ack *extack)
+ +{
+ +      struct tun_struct *tun = netdev_priv(dev);
+ +      struct bpf_prog *old_prog;
+ +
+ +      old_prog = rtnl_dereference(tun->xdp_prog);
+ +      rcu_assign_pointer(tun->xdp_prog, prog);
+ +      if (old_prog)
+ +              bpf_prog_put(old_prog);
+ +
+ +      return 0;
+ +}
+ +
+ +static u32 tun_xdp_query(struct net_device *dev)
+ +{
+ +      struct tun_struct *tun = netdev_priv(dev);
+ +      const struct bpf_prog *xdp_prog;
+ +
+ +      xdp_prog = rtnl_dereference(tun->xdp_prog);
+ +      if (xdp_prog)
+ +              return xdp_prog->aux->id;
+ +
+ +      return 0;
+ +}
+ +
+ +static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+ +{
+ +      switch (xdp->command) {
+ +      case XDP_SETUP_PROG:
+ +              return tun_xdp_set(dev, xdp->prog, xdp->extack);
+ +      case XDP_QUERY_PROG:
+ +              xdp->prog_id = tun_xdp_query(dev);
+ +              xdp->prog_attached = !!xdp->prog_id;
+ +              return 0;
+ +      default:
+ +              return -EINVAL;
+ +      }
+ +}
+ +
   static const struct net_device_ops tun_netdev_ops = {
         .ndo_uninit             = tun_net_uninit,
         .ndo_open               = tun_net_open,
@@@ -1083,7 -1033,6 +1083,7 @@@ static const struct net_device_ops tap_
         .ndo_features_check     = passthru_features_check,
         .ndo_set_rx_headroom    = tun_set_headroom,
         .ndo_get_stats64        = tun_net_get_stats64,
+ +      .ndo_xdp                = tun_xdp,
   };
   
   static void tun_flow_init(struct tun_struct *tun)
@@@ -1241,128 -1190,6 +1241,128 @@@ static void tun_rx_batched(struct tun_s
         }
   }
   
+ +static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
+ +                            int len, int noblock, bool zerocopy)
+ +{
+ +      if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
+ +              return false;
+ +
+ +      if (tfile->socket.sk->sk_sndbuf != INT_MAX)
+ +              return false;
+ +
+ +      if (!noblock)
+ +              return false;
+ +
+ +      if (zerocopy)
+ +              return false;
+ +
+ +      if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+ +          SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
+ +              return false;
+ +
+ +      return true;
+ +}
+ +
+ +static struct sk_buff *tun_build_skb(struct tun_struct *tun,
+ +                                   struct tun_file *tfile,
+ +                                   struct iov_iter *from,
+ +                                   struct virtio_net_hdr *hdr,
+ +                                   int len, int *generic_xdp)
+ +{
+ +      struct page_frag *alloc_frag = &current->task_frag;
+ +      struct sk_buff *skb;
+ +      struct bpf_prog *xdp_prog;
+ +      int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+ +                   SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ +      unsigned int delta = 0;
+ +      char *buf;
+ +      size_t copied;
+ +      bool xdp_xmit = false;
+ +      int err;
+ +
+ +      if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
+ +              return ERR_PTR(-ENOMEM);
+ +
+ +      buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+ +      copied = copy_page_from_iter(alloc_frag->page,
+ +                                   alloc_frag->offset + TUN_RX_PAD,
+ +                                   len, from);
+ +      if (copied != len)
+ +              return ERR_PTR(-EFAULT);
+ +
+ +      if (hdr->gso_type)
+ +              *generic_xdp = 1;
+ +      else
+ +              *generic_xdp = 0;
+ +
+ +      rcu_read_lock();
+ +      xdp_prog = rcu_dereference(tun->xdp_prog);
+ +      if (xdp_prog && !*generic_xdp) {
+ +              struct xdp_buff xdp;
+ +              void *orig_data;
+ +              u32 act;
+ +
+ +              xdp.data_hard_start = buf;
+ +              xdp.data = buf + TUN_RX_PAD;
+ +              xdp.data_end = xdp.data + len;
+ +              orig_data = xdp.data;
+ +              act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ +
+ +              switch (act) {
+ +              case XDP_REDIRECT:
+ +                      get_page(alloc_frag->page);
+ +                      alloc_frag->offset += buflen;
+ +                      err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
+ +                      if (err)
+ +                              goto err_redirect;
+ +                      return NULL;
+ +              case XDP_TX:
+ +                      xdp_xmit = true;
+ +                      /* fall through */
+ +              case XDP_PASS:
+ +                      delta = orig_data - xdp.data;
+ +                      break;
+ +              default:
+ +                      bpf_warn_invalid_xdp_action(act);
+ +                      /* fall through */
+ +              case XDP_ABORTED:
+ +                      trace_xdp_exception(tun->dev, xdp_prog, act);
+ +                      /* fall through */
+ +              case XDP_DROP:
+ +                      goto err_xdp;
+ +              }
+ +      }
+ +
+ +      skb = build_skb(buf, buflen);
+ +      if (!skb) {
+ +              rcu_read_unlock();
+ +              return ERR_PTR(-ENOMEM);
+ +      }
+ +
+ +      skb_reserve(skb, TUN_RX_PAD - delta);
+ +      skb_put(skb, len + delta);
+ +      get_page(alloc_frag->page);
+ +      alloc_frag->offset += buflen;
+ +
+ +      if (xdp_xmit) {
+ +              skb->dev = tun->dev;
+ +              generic_xdp_tx(skb, xdp_prog);
+ +              rcu_read_lock();
+ +              return NULL;
+ +      }
+ +
+ +      rcu_read_unlock();
+ +
+ +      return skb;
+ +
+ +err_redirect:
+ +      put_page(alloc_frag->page);
+ +err_xdp:
+ +      rcu_read_unlock();
+ +      this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ +      return NULL;
+ +}
+ +
   /* Get packet from user space buffer */
   static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
                             void *msg_control, struct iov_iter *from,
@@@ -1379,7 -1206,6 +1379,7 @@@
         bool zerocopy = false;
         int err;
         u32 rxhash;
+ +      int generic_xdp = 1;
   
         if (!(tun->dev->flags & IFF_UP))
                 return -EIO;
@@@ -1437,40 -1263,30 +1437,40 @@@
                         zerocopy = true;
         }
   
- -      if (!zerocopy) {
- -              copylen = len;
- -              if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
- -                      linear = good_linear;
- -              else
- -                      linear = tun16_to_cpu(tun, gso.hdr_len);
- -      }
- -
- -      skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
- -      if (IS_ERR(skb)) {
- -              if (PTR_ERR(skb) != -EAGAIN)
+ +      if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
+ +              skb = tun_build_skb(tun, tfile, from, &gso, len, &generic_xdp);
+ +              if (IS_ERR(skb)) {
                         this_cpu_inc(tun->pcpu_stats->rx_dropped);
- -              return PTR_ERR(skb);
- -      }
+ +                      return PTR_ERR(skb);
+ +              }
+ +              if (!skb)
+ +                      return total_len;
+ +      } else {
+ +              if (!zerocopy) {
+ +                      copylen = len;
+ +                      if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
+ +                              linear = good_linear;
+ +                      else
+ +                              linear = tun16_to_cpu(tun, gso.hdr_len);
+ +              }
   
- -      if (zerocopy)
- -              err = zerocopy_sg_from_iter(skb, from);
- -      else
- -              err = skb_copy_datagram_from_iter(skb, 0, from, len);
+ +              skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
+ +              if (IS_ERR(skb)) {
+ +                      if (PTR_ERR(skb) != -EAGAIN)
+ +                              this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ +                      return PTR_ERR(skb);
+ +              }
   
- -      if (err) {
- -              this_cpu_inc(tun->pcpu_stats->rx_dropped);
- -              kfree_skb(skb);
- -              return -EFAULT;
+ +              if (zerocopy)
+ +                      err = zerocopy_sg_from_iter(skb, from);
+ +              else
+ +                      err = skb_copy_datagram_from_iter(skb, 0, from, len);
+ +
+ +              if (err) {
+ +                      this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ +                      kfree_skb(skb);
+ +                      return -EFAULT;
+ +              }
         }
   
         if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
@@@ -1518,22 -1334,6 +1518,22 @@@
         skb_reset_network_header(skb);
         skb_probe_transport_header(skb, 0);
   
+ +      if (generic_xdp) {
+ +              struct bpf_prog *xdp_prog;
+ +              int ret;
+ +
+ +              rcu_read_lock();
+ +              xdp_prog = rcu_dereference(tun->xdp_prog);
+ +              if (xdp_prog) {
+ +                      ret = do_xdp_generic(xdp_prog, skb);
+ +                      if (ret != XDP_PASS) {
+ +                              rcu_read_unlock();
+ +                              return total_len;
+ +                      }
+ +              }
+ +              rcu_read_unlock();
+ +      }
+ +
         rxhash = __skb_get_hash_symmetric(skb);
   #ifndef CONFIG_4KSTACKS
         tun_rx_batched(tun, tfile, skb, more);
@@@ -2079,6 -1879,9 +2079,9 @@@ static int tun_set_iff(struct net *net
   
   err_detach:
         tun_detach_all(dev);
+       /* register_netdevice() already called tun_free_netdev() */
+       goto err_free_dev;
+ 
   err_free_flow:
         tun_flow_uninit(tun);
         security_tun_dev_free_security(tun->security);
@@@ -2121,6 -1924,11 +2124,6 @@@ static int set_offload(struct tun_struc
                                 features |= NETIF_F_TSO6;
                         arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
                 }
- -
- -              if (arg & TUN_F_UFO) {
- -                      features |= NETIF_F_UFO;
- -                      arg &= ~TUN_F_UFO;
- -              }
         }
   
         /* This gives the user a way to test for new features in future by
@@@ -2732,7 -2540,7 +2735,7 @@@ static int tun_queue_resize(struct tun_
         int n = tun->numqueues + tun->numdisabled;
         int ret, i;
   
- -      arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
+ +      arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
         if (!arrays)
                 return -ENOMEM;
   
diff --combined include/net/ip.h

index 39db596eb89fc346c549945482582f0abc89b6f0,0cf7f5a65fe6be2be30259aa5cd251d02de489d5..9896f46cbbf11235395d75a5ec18a14736ee099d
--- 1/include/net/ip.h
--- 2/include/net/ip.h
+++ b/include/net/ip.h
@@@ -78,16 -78,6 +78,16 @@@ struct ipcm_cookie 
   #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
   #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
   
+ +/* return enslaved device index if relevant */
+ +static inline int inet_sdif(struct sk_buff *skb)
+ +{
+ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ +      if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
+ +              return IPCB(skb)->iif;
+ +#endif
+ +      return 0;
+ +}
+ +
   struct ip_ra_chain {
         struct ip_ra_chain __rcu *next;
         struct sock             *sk;
@@@ -362,7 -352,7 +362,7 @@@ static inline unsigned int ip_dst_mtu_m
             !forwarding)
                 return dst_mtu(dst);
   
-       return min(dst->dev->mtu, IP_MAX_MTU);
+       return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
   }
   
   static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
@@@ -374,7 -364,7 +374,7 @@@
                 return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
         }
   
-       return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
+       return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
   }
   
   u32 ip_idents_reserve(u32 hash, int segs);
@@@ -577,12 -567,11 +577,12 @@@ int ip_forward(struct sk_buff *skb)
   void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
                       __be32 daddr, struct rtable *rt, int is_frag);
   
- -int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
- -                    const struct ip_options *sopt);
- -static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+ +int __ip_options_echo(struct net *net, struct ip_options *dopt,
+ +                    struct sk_buff *skb, const struct ip_options *sopt);
+ +static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
+ +                                struct sk_buff *skb)
   {
- -      return __ip_options_echo(dopt, skb, &IPCB(skb)->opt);
+ +      return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt);
   }
   
   void ip_options_fragment(struct sk_buff *skb);
diff --combined include/net/sch_generic.h

index 107c5243224507144d53363b04a303ecf3397090,67f815e5d52517390226bc3531b1ea7b5f1020bc..1688f0f6c7ba9b4905682bbeb2417be8a79a98aa
--- 1/include/net/sch_generic.h
--- 2/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@@ -156,6 -156,7 +156,6 @@@ struct Qdisc_class_ops 
   
         /* Filter manipulation */
         struct tcf_block *      (*tcf_block)(struct Qdisc *, unsigned long);
- -      bool                    (*tcf_cl_offload)(u32 classid);
         unsigned long           (*bind_tcf)(struct Qdisc *, unsigned long,
                                         u32 classid);
         void                    (*unbind_tcf)(struct Qdisc *, unsigned long);
@@@ -212,16 -213,16 +212,16 @@@ struct tcf_proto_ops 
         int                     (*init)(struct tcf_proto*);
         void                    (*destroy)(struct tcf_proto*);
   
- -      unsigned long           (*get)(struct tcf_proto*, u32 handle);
+ +      void*                   (*get)(struct tcf_proto*, u32 handle);
         int                     (*change)(struct net *net, struct sk_buff *,
                                         struct tcf_proto*, unsigned long,
                                         u32 handle, struct nlattr **,
- -                                      unsigned long *, bool);
- -      int                     (*delete)(struct tcf_proto*, unsigned long, bool*);
+ +                                      void **, bool);
+ +      int                     (*delete)(struct tcf_proto*, void *, bool*);
         void                    (*walk)(struct tcf_proto*, struct tcf_walker *arg);
   
         /* rtnetlink specific */
- -      int                     (*dump)(struct net*, struct tcf_proto*, unsigned long,
+ +      int                     (*dump)(struct net*, struct tcf_proto*, void *,
                                         struct sk_buff *skb, struct tcmsg*);
   
         struct module           *owner;
@@@ -393,9 -394,6 +393,9 @@@ qdisc_class_find(const struct Qdisc_cla
         struct Qdisc_class_common *cl;
         unsigned int h;
   
+ +      if (!id)
+ +              return NULL;
+ +
         h = qdisc_class_hash(id, hash->hashmask);
         hlist_for_each_entry(cl, &hash->hash[h], hnode) {
                 if (cl->classid == id)
@@@ -808,8 -806,11 +808,11 @@@ static inline struct Qdisc *qdisc_repla
         old = *pold;
         *pold = new;
         if (old != NULL) {
-               qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog);
+               unsigned int qlen = old->q.qlen;
+               unsigned int backlog = old->qstats.backlog;
+ 
                 qdisc_reset(old);
+               qdisc_tree_reduce_backlog(old, qlen, backlog);
         }
         sch_tree_unlock(sch);
   
diff --combined include/net/sock.h

index fe1a0bc25cd3e5a2ce8fb9965df7ac532d592bdb,aeeec62992ca7dc5ff80f8d7164a1c143f606b03..1c2912d433e81b10f3fdc87bcfcbb091570edc03
--- 1/include/net/sock.h
--- 2/include/net/sock.h
+++ b/include/net/sock.h
@@@ -294,7 -294,6 +294,7 @@@ struct sock_common 
     *   @sk_stamp: time stamp of last packet received
     *   @sk_tsflags: SO_TIMESTAMPING socket options
     *   @sk_tskey: counter to disambiguate concurrent tstamp requests
+ +  *   @sk_zckey: counter to order MSG_ZEROCOPY notifications
     *   @sk_socket: Identd and reporting IO signals
     *   @sk_user_data: RPC layer private data
     *   @sk_frag: cached page frag
@@@ -463,7 -462,6 +463,7 @@@ struct sock 
         u16                     sk_tsflags;
         u8                      sk_shutdown;
         u32                     sk_tskey;
+ +      atomic_t                sk_zckey;
         struct socket           *sk_socket;
         void                    *sk_user_data;
   #ifdef CONFIG_SECURITY
@@@ -509,9 -507,7 +509,7 @@@ int sk_set_peek_off(struct sock *sk, in
   static inline int sk_peek_offset(struct sock *sk, int flags)
   {
         if (unlikely(flags & MSG_PEEK)) {
-               s32 off = READ_ONCE(sk->sk_peek_off);
-               if (off >= 0)
-                       return off;
+               return READ_ONCE(sk->sk_peek_off);
         }
   
         return 0;
@@@ -1533,8 -1529,6 +1531,8 @@@ struct sk_buff *sock_wmalloc(struct soc
                              gfp_t priority);
   void __sock_wfree(struct sk_buff *skb);
   void sock_wfree(struct sk_buff *skb);
+ +struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+ +                           gfp_t priority);
   void skb_orphan_partial(struct sk_buff *skb);
   void sock_rfree(struct sk_buff *skb);
   void sock_efree(struct sk_buff *skb);
@@@ -1586,14 -1580,11 +1584,14 @@@ int sock_no_shutdown(struct socket *, i
   int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *);
   int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int);
   int sock_no_sendmsg(struct socket *, struct msghdr *, size_t);
+ +int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
   int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
   int sock_no_mmap(struct file *file, struct socket *sock,
                  struct vm_area_struct *vma);
   ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
                          size_t size, int flags);
+ +ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+ +                              int offset, size_t size, int flags);
   
   /*
    * Functions to fill in entries in struct proto_ops when a protocol
diff --combined kernel/events/core.c

index a7a6c1d19a4929f9879cbc2f1eef1b4434356af0,ee20d4c546b5ebc0248c084e11c483e0ef800c6f..8c01572709aca5c4144d30d2226e1deeab9e3e0e
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -2217,6 -2217,33 +2217,33 @@@ static int group_can_go_on(struct perf_
         return can_add_hw;
   }
   
+ /*
+  * Complement to update_event_times(). This computes the tstamp_* values to
+  * continue 'enabled' state from @now, and effectively discards the time
+  * between the prior tstamp_stopped and now (as we were in the OFF state, or
+  * just switched (context) time base).
+  *
+  * This further assumes '@event->state == INACTIVE' (we just came from OFF) and
+  * cannot have been scheduled in yet. And going into INACTIVE state means
+  * '@event->tstamp_stopped = @now'.
+  *
+  * Thus given the rules of update_event_times():
+  *
+  *   total_time_enabled = tstamp_stopped - tstamp_enabled
+  *   total_time_running = tstamp_stopped - tstamp_running
+  *
+  * We can insert 'tstamp_stopped == now' and reverse them to compute new
+  * tstamp_* values.
+  */
+ static void __perf_event_enable_time(struct perf_event *event, u64 now)
+ {
+       WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE);
+ 
+       event->tstamp_stopped = now;
+       event->tstamp_enabled = now - event->total_time_enabled;
+       event->tstamp_running = now - event->total_time_running;
+ }
+ 
   static void add_event_to_ctx(struct perf_event *event,
                                struct perf_event_context *ctx)
   {
@@@ -2224,9 -2251,12 +2251,12 @@@
   
         list_add_event(event, ctx);
         perf_group_attach(event);
-       event->tstamp_enabled = tstamp;
-       event->tstamp_running = tstamp;
-       event->tstamp_stopped = tstamp;
+       /*
+        * We can be called with event->state == STATE_OFF when we create with
+        * .disabled = 1. In that case the IOC_ENABLE will call this function.
+        */
+       if (event->state == PERF_EVENT_STATE_INACTIVE)
+               __perf_event_enable_time(event, tstamp);
   }
   
   static void ctx_sched_out(struct perf_event_context *ctx,
@@@ -2471,10 -2501,11 +2501,11 @@@ static void __perf_event_mark_enabled(s
         u64 tstamp = perf_event_time(event);
   
         event->state = PERF_EVENT_STATE_INACTIVE;
-       event->tstamp_enabled = tstamp - event->total_time_enabled;
+       __perf_event_enable_time(event, tstamp);
         list_for_each_entry(sub, &event->sibling_list, group_entry) {
+               /* XXX should not be > INACTIVE if event isn't */
                 if (sub->state >= PERF_EVENT_STATE_INACTIVE)
-                       sub->tstamp_enabled = tstamp - sub->total_time_enabled;
+                       __perf_event_enable_time(sub, tstamp);
         }
   }
   
@@@ -5090,7 -5121,7 +5121,7 @@@ static void perf_mmap_open(struct vm_ar
                 atomic_inc(&event->rb->aux_mmap_count);
   
         if (event->pmu->event_mapped)
-               event->pmu->event_mapped(event);
+               event->pmu->event_mapped(event, vma->vm_mm);
   }
   
   static void perf_pmu_output_stop(struct perf_event *event);
@@@ -5113,7 -5144,7 +5144,7 @@@ static void perf_mmap_close(struct vm_a
         unsigned long size = perf_data_size(rb);
   
         if (event->pmu->event_unmapped)
-               event->pmu->event_unmapped(event);
+               event->pmu->event_unmapped(event, vma->vm_mm);
   
         /*
          * rb->aux_mmap_count will always drop before rb->mmap_count and
@@@ -5411,7 -5442,7 +5442,7 @@@ aux_unlock
         vma->vm_ops = &perf_mmap_vmops;
   
         if (event->pmu->event_mapped)
-               event->pmu->event_mapped(event);
+               event->pmu->event_mapped(event, vma->vm_mm);
   
         return ret;
   }
@@@ -8050,7 -8081,7 +8081,7 @@@ static void perf_event_free_bpf_handler
   
   static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
   {
- -      bool is_kprobe, is_tracepoint;
+ +      bool is_kprobe, is_tracepoint, is_syscall_tp;
         struct bpf_prog *prog;
   
         if (event->attr.type != PERF_TYPE_TRACEPOINT)
@@@ -8061,8 -8092,7 +8092,8 @@@
   
         is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
         is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
- -      if (!is_kprobe && !is_tracepoint)
+ +      is_syscall_tp = is_syscall_trace_event(event->tp_event);
+ +      if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
                 /* bpf programs can only be attached to u/kprobe or tracepoint */
                 return -EINVAL;
   
@@@ -8071,14 -8101,13 +8102,14 @@@
                 return PTR_ERR(prog);
   
         if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
- -          (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
+ +          (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
+ +          (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
                 /* valid fd, but invalid bpf program type */
                 bpf_prog_put(prog);
                 return -EINVAL;
         }
   
- -      if (is_tracepoint) {
+ +      if (is_tracepoint || is_syscall_tp) {
                 int off = trace_event_get_offsets(event->tp_event);
   
                 if (prog->aux->max_ctx_offset > off) {
diff --combined net/core/datagram.c

index 2f3277945d356bd13d51b213bfbca59f70c351c1,a21ca8dee5eadca0d9ab7c78a939ac90bb3963b0..a4d5f10d83a1ca6cf9bb1e8dc6d6faeae5947e4d
--- 1/net/core/datagram.c
--- 2/net/core/datagram.c
+++ b/net/core/datagram.c
@@@ -169,14 -169,20 +169,20 @@@ struct sk_buff *__skb_try_recv_from_que
                                           int *peeked, int *off, int *err,
                                           struct sk_buff **last)
   {
+       bool peek_at_off = false;
         struct sk_buff *skb;
-       int _off = *off;
+       int _off = 0;
+ 
+       if (unlikely(flags & MSG_PEEK && *off >= 0)) {
+               peek_at_off = true;
+               _off = *off;
+       }
   
         *last = queue->prev;
         skb_queue_walk(queue, skb) {
                 if (flags & MSG_PEEK) {
-                       if (_off >= skb->len && (skb->len || _off ||
-                                                skb->peeked)) {
+                       if (peek_at_off && _off >= skb->len &&
+                           (_off || skb->peeked)) {
                                 _off -= skb->len;
                                 continue;
                         }
@@@ -573,12 -579,27 +579,12 @@@ fault
   }
   EXPORT_SYMBOL(skb_copy_datagram_from_iter);
   
- -/**
- - *    zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
- - *    @skb: buffer to copy
- - *    @from: the source to copy from
- - *
- - *    The function will first copy up to headlen, and then pin the userspace
- - *    pages and build frags through them.
- - *
- - *    Returns 0, -EFAULT or -EMSGSIZE.
- - */
- -int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+ +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+ +                          struct iov_iter *from, size_t length)
   {
- -      int len = iov_iter_count(from);
- -      int copy = min_t(int, skb_headlen(skb), len);
- -      int frag = 0;
+ +      int frag = skb_shinfo(skb)->nr_frags;
   
- -      /* copy up to skb headlen */
- -      if (skb_copy_datagram_from_iter(skb, 0, from, copy))
- -              return -EFAULT;
- -
- -      while (iov_iter_count(from)) {
+ +      while (length && iov_iter_count(from)) {
                 struct page *pages[MAX_SKB_FRAGS];
                 size_t start;
                 ssize_t copied;
@@@ -588,24 -609,18 +594,24 @@@
                 if (frag == MAX_SKB_FRAGS)
                         return -EMSGSIZE;
   
- -              copied = iov_iter_get_pages(from, pages, ~0U,
+ +              copied = iov_iter_get_pages(from, pages, length,
                                             MAX_SKB_FRAGS - frag, &start);
                 if (copied < 0)
                         return -EFAULT;
   
                 iov_iter_advance(from, copied);
+ +              length -= copied;
   
                 truesize = PAGE_ALIGN(copied + start);
                 skb->data_len += copied;
                 skb->len += copied;
                 skb->truesize += truesize;
- -              refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ +              if (sk && sk->sk_type == SOCK_STREAM) {
+ +                      sk->sk_wmem_queued += truesize;
+ +                      sk_mem_charge(sk, truesize);
+ +              } else {
+ +                      refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ +              }
                 while (copied) {
                         int size = min_t(int, copied, PAGE_SIZE - start);
                         skb_fill_page_desc(skb, frag++, pages[n], start, size);
@@@ -616,28 -631,6 +622,28 @@@
         }
         return 0;
   }
+ +EXPORT_SYMBOL(__zerocopy_sg_from_iter);
+ +
+ +/**
+ + *    zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
+ + *    @skb: buffer to copy
+ + *    @from: the source to copy from
+ + *
+ + *    The function will first copy up to headlen, and then pin the userspace
+ + *    pages and build frags through them.
+ + *
+ + *    Returns 0, -EFAULT or -EMSGSIZE.
+ + */
+ +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+ +{
+ +      int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
+ +
+ +      /* copy up to skb headlen */
+ +      if (skb_copy_datagram_from_iter(skb, 0, from, copy))
+ +              return -EFAULT;
+ +
+ +      return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+ +}
   EXPORT_SYMBOL(zerocopy_sg_from_iter);
   
   static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
diff --combined net/ipv4/igmp.c

index 9f86b5133605c90c58f670460994cf9986f87339,caf2f1101d027b7b6e8d9683887e16c7bd4a8438..ab183af0b5b6a8f9b7fd02b32b56d32487518f7a
--- 1/net/ipv4/igmp.c
--- 2/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@@ -1007,10 -1007,18 +1007,18 @@@ int igmp_rcv(struct sk_buff *skb
   {
         /* This basically follows the spec line by line -- see RFC1112 */
         struct igmphdr *ih;
-       struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+       struct net_device *dev = skb->dev;
+       struct in_device *in_dev;
         int len = skb->len;
         bool dropped = true;
   
+       if (netif_is_l3_master(dev)) {
+               dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif);
+               if (!dev)
+                       goto drop;
+       }
+ 
+       in_dev = __in_dev_get_rcu(dev);
         if (!in_dev)
                 goto drop;
   
@@@ -2549,8 -2557,7 +2557,8 @@@ done
   /*
    * check if a multicast source filter allows delivery for a given <src,dst,intf>
    */
- -int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
+ +int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
+ +                 int dif, int sdif)
   {
         struct inet_sock *inet = inet_sk(sk);
         struct ip_mc_socklist *pmc;
@@@ -2565,8 -2572,7 +2573,8 @@@
         rcu_read_lock();
         for_each_pmc_rcu(inet, pmc) {
                 if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
- -                  pmc->multi.imr_ifindex == dif)
+ +                  (pmc->multi.imr_ifindex == dif ||
+ +                   (sdif && pmc->multi.imr_ifindex == sdif)))
                         break;
         }
         ret = inet->mc_all;
diff --combined net/ipv4/route.c

index 872b4cb136d3fa0cda403836cc83a156a65310a3,2331de20ca505d7f25fe9d93d5320e9e39af6c39..94d4cd2d5ea4f4589783528d8e951d3365078bc6
--- 1/net/ipv4/route.c
--- 2/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@@ -1267,7 -1267,7 +1267,7 @@@ static unsigned int ipv4_mtu(const stru
         if (mtu)
                 return mtu;
   
-       mtu = dst->dev->mtu;
+       mtu = READ_ONCE(dst->dev->mtu);
   
         if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
                 if (rt->rt_uses_gateway && mtu > 576)
@@@ -1398,7 -1398,7 +1398,7 @@@ static void ipv4_dst_destroy(struct dst
         struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
         struct rtable *rt = (struct rtable *) dst;
   
- -      if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+ +      if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
                 kfree(p);
   
         if (!list_empty(&rt->rt_uncached)) {
@@@ -1456,7 -1456,7 +1456,7 @@@ static void rt_set_nexthop(struct rtabl
                 dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
                 if (fi->fib_metrics != &dst_default_metrics) {
                         rt->dst._metrics |= DST_METRICS_REFCOUNTED;
- -                      atomic_inc(&fi->fib_metrics->refcnt);
+ +                      refcount_inc(&fi->fib_metrics->refcnt);
                 }
   #ifdef CONFIG_IP_ROUTE_CLASSID
                 rt->dst.tclassid = nh->nh_tclassid;
@@@ -2236,7 -2236,7 +2236,7 @@@ add
         if (!rth)
                 return ERR_PTR(-ENOBUFS);
   
- -      rth->rt_iif     = orig_oif ? : 0;
+ +      rth->rt_iif = orig_oif;
         if (res->table)
                 rth->rt_table_id = res->table->tb_id;
   
@@@ -2439,12 -2439,6 +2439,12 @@@ struct rtable *ip_route_output_key_hash
                 /* L3 master device is the loopback for that domain */
                 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
                         net->loopback_dev;
+ +
+ +              /* make sure orig_oif points to fib result device even
+ +               * though packet rx/tx happens over loopback or l3mdev
+ +               */
+ +              orig_oif = FIB_RES_OIF(*res);
+ +
                 fl4->flowi4_oif = dev_out->ifindex;
                 flags |= RTCF_LOCAL;
                 goto make_route;
@@@ -2769,14 -2763,21 +2769,21 @@@ static int inet_rtm_getroute(struct sk_
         if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
                 table_id = rt->rt_table_id;
   
-       if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+       if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
+               if (!res.fi) {
+                       err = fib_props[res.type].error;
+                       if (!err)
+                               err = -EHOSTUNREACH;
+                       goto errout_free;
+               }
                 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
                                     nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
                                     rt->rt_type, res.prefix, res.prefixlen,
                                     fl4.flowi4_tos, res.fi, 0);
-       else
+       } else {
                 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
                                    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
+       }
         if (err < 0)
                 goto errout_free;
   
@@@ -3074,8 -3075,7 +3081,8 @@@ int __init ip_rt_init(void
         xfrm_init();
         xfrm4_init();
   #endif
- -      rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
+ +      rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
+ +                    RTNL_FLAG_DOIT_UNLOCKED);
   
   #ifdef CONFIG_SYSCTL
         register_pernet_subsys(&sysctl_route_ops);
diff --combined net/ipv4/tcp_input.c

index d73903fe8c83d68ee005c1bb9136d9ab008469fd,bab7f0493098c6521f445923d721d296f326f7e1..ddc854728a6011cc36d82ef0e226c0fbbbec339d
--- 1/net/ipv4/tcp_input.c
--- 2/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -103,6 -103,7 +103,6 @@@ int sysctl_tcp_invalid_ratelimit __read
   #define FLAG_DATA_SACKED      0x20 /* New SACK.                               */
   #define FLAG_ECE              0x40 /* ECE in this ACK                         */
   #define FLAG_LOST_RETRANS     0x80 /* This ACK marks some retransmission lost */
- -#define FLAG_SLOWPATH         0x100 /* Do not skip RFC checks for window update.*/
   #define FLAG_ORIG_SACK_ACKED  0x200 /* Never retransmitted data are (s)acked  */
   #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
   #define FLAG_DSACKING_ACK     0x800 /* SACK blocks contained D-SACK info */
@@@ -1951,7 -1952,6 +1951,7 @@@ void tcp_enter_loss(struct sock *sk
             !after(tp->high_seq, tp->snd_una) ||
             (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
                 tp->prior_ssthresh = tcp_current_ssthresh(sk);
+ +              tp->prior_cwnd = tp->snd_cwnd;
                 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
                 tcp_ca_event(sk, CA_EVENT_LOSS);
                 tcp_init_undo(tp);
@@@ -3009,8 -3009,7 +3009,7 @@@ void tcp_rearm_rto(struct sock *sk
                         /* delta_us may not be positive if the socket is locked
                          * when the retrans timer fires and is rescheduled.
                          */
-                       if (delta_us > 0)
-                               rto = usecs_to_jiffies(delta_us);
+                       rto = usecs_to_jiffies(max_t(int, delta_us, 1));
                 }
                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
                                           TCP_RTO_MAX);
@@@ -3372,6 -3371,12 +3371,6 @@@ static int tcp_ack_update_window(struc
                 if (tp->snd_wnd != nwin) {
                         tp->snd_wnd = nwin;
   
- -                      /* Note, it is the only place, where
- -                       * fast path is recovered for sending TCP.
- -                       */
- -                      tp->pred_flags = 0;
- -                      tcp_fast_path_check(sk);
- -
                         if (tcp_send_head(sk))
                                 tcp_slow_start_after_idle_check(sk);
   
@@@ -3553,7 -3558,6 +3552,7 @@@ static int tcp_ack(struct sock *sk, con
         u32 lost = tp->lost;
         int acked = 0; /* Number of packets newly acked */
         int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ +      u32 ack_ev_flags = 0;
   
         sack_state.first_sackt = 0;
         sack_state.rate = &rs;
@@@ -3594,26 -3598,42 +3593,26 @@@
         if (flag & FLAG_UPDATE_TS_RECENT)
                 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
   
- -      if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
- -              /* Window is constant, pure forward advance.
- -               * No more checks are required.
- -               * Note, we use the fact that SND.UNA>=SND.WL2.
- -               */
- -              tcp_update_wl(tp, ack_seq);
- -              tcp_snd_una_update(tp, ack);
- -              flag |= FLAG_WIN_UPDATE;
- -
- -              tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
- -
- -              NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
- -      } else {
- -              u32 ack_ev_flags = CA_ACK_SLOWPATH;
- -
- -              if (ack_seq != TCP_SKB_CB(skb)->end_seq)
- -                      flag |= FLAG_DATA;
- -              else
- -                      NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
+ +      if (ack_seq != TCP_SKB_CB(skb)->end_seq)
+ +              flag |= FLAG_DATA;
+ +      else
+ +              NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
   
- -              flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
+ +      flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
   
- -              if (TCP_SKB_CB(skb)->sacked)
- -                      flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- -                                                      &sack_state);
+ +      if (TCP_SKB_CB(skb)->sacked)
+ +              flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+ +                                              &sack_state);
   
- -              if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
- -                      flag |= FLAG_ECE;
- -                      ack_ev_flags |= CA_ACK_ECE;
- -              }
+ +      if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+ +              flag |= FLAG_ECE;
+ +              ack_ev_flags = CA_ACK_ECE;
+ +      }
   
- -              if (flag & FLAG_WIN_UPDATE)
- -                      ack_ev_flags |= CA_ACK_WIN_UPDATE;
+ +      if (flag & FLAG_WIN_UPDATE)
+ +              ack_ev_flags |= CA_ACK_WIN_UPDATE;
   
- -              tcp_in_ack_event(sk, ack_ev_flags);
- -      }
+ +      tcp_in_ack_event(sk, ack_ev_flags);
   
         /* We passed data and got it acked, remove any soft error
          * log. Something worked...
@@@ -4381,6 -4401,8 +4380,6 @@@ static void tcp_data_queue_ofo(struct s
                 return;
         }
   
- -      /* Disable header prediction. */
- -      tp->pred_flags = 0;
         inet_csk_schedule_ack(sk);
   
         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@@ -4569,8 -4591,8 +4568,8 @@@ err
   static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
   {
         struct tcp_sock *tp = tcp_sk(sk);
- -      bool fragstolen = false;
- -      int eaten = -1;
+ +      bool fragstolen;
+ +      int eaten;
   
         if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
                 __kfree_skb(skb);
@@@ -4592,13 -4614,32 +4591,13 @@@
                         goto out_of_window;
   
                 /* Ok. In sequence. In window. */
- -              if (tp->ucopy.task == current &&
- -                  tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
- -                  sock_owned_by_user(sk) && !tp->urg_data) {
- -                      int chunk = min_t(unsigned int, skb->len,
- -                                        tp->ucopy.len);
- -
- -                      __set_current_state(TASK_RUNNING);
- -
- -                      if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
- -                              tp->ucopy.len -= chunk;
- -                              tp->copied_seq += chunk;
- -                              eaten = (chunk == skb->len);
- -                              tcp_rcv_space_adjust(sk);
- -                      }
- -              }
- -
- -              if (eaten <= 0) {
   queue_and_out:
- -                      if (eaten < 0) {
- -                              if (skb_queue_len(&sk->sk_receive_queue) == 0)
- -                                      sk_forced_mem_schedule(sk, skb->truesize);
- -                              else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
- -                                      goto drop;
- -                      }
- -                      eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
- -              }
+ +              if (skb_queue_len(&sk->sk_receive_queue) == 0)
+ +                      sk_forced_mem_schedule(sk, skb->truesize);
+ +              else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+ +                      goto drop;
+ +
+ +              eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
                 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                 if (skb->len)
                         tcp_event_data_recv(sk, skb);
@@@ -4618,6 -4659,8 +4617,6 @@@
                 if (tp->rx_opt.num_sacks)
                         tcp_sack_remove(tp);
   
- -              tcp_fast_path_check(sk);
- -
                 if (eaten > 0)
                         kfree_skb_partial(skb, fragstolen);
                 if (!sock_flag(sk, SOCK_DEAD))
@@@ -4943,6 -4986,7 +4942,6 @@@ static int tcp_prune_queue(struct sock 
         NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
   
         /* Massive buffer overcommit. */
- -      tp->pred_flags = 0;
         return -1;
   }
   
@@@ -5114,6 -5158,9 +5113,6 @@@ static void tcp_check_urg(struct sock *
   
         tp->urg_data = TCP_URG_NOTYET;
         tp->urg_seq = ptr;
- -
- -      /* Disable header prediction. */
- -      tp->pred_flags = 0;
   }
   
   /* This is the 'fast' part of urgent handling. */
@@@ -5142,6 -5189,26 +5141,6 @@@ static void tcp_urg(struct sock *sk, st
         }
   }
   
- -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
- -{
- -      struct tcp_sock *tp = tcp_sk(sk);
- -      int chunk = skb->len - hlen;
- -      int err;
- -
- -      if (skb_csum_unnecessary(skb))
- -              err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
- -      else
- -              err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
- -
- -      if (!err) {
- -              tp->ucopy.len -= chunk;
- -              tp->copied_seq += chunk;
- -              tcp_rcv_space_adjust(sk);
- -      }
- -
- -      return err;
- -}
- -
   /* Accept RST for rcv_nxt - 1 after a FIN.
    * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
    * FIN is sent followed by a RST packet. The RST is sent with the same
@@@ -5272,29 -5339,201 +5271,29 @@@ discard
   
   /*
    *    TCP receive function for the ESTABLISHED state.
- - *
- - *    It is split into a fast path and a slow path. The fast path is
- - *    disabled when:
- - *    - A zero window was announced from us - zero window probing
- - *        is only handled properly in the slow path.
- - *    - Out of order segments arrived.
- - *    - Urgent data is expected.
- - *    - There is no buffer space left
- - *    - Unexpected TCP flags/window values/header lengths are received
- - *      (detected by checking the TCP header against pred_flags)
- - *    - Data is sent in both directions. Fast path only supports pure senders
- - *      or pure receivers (this means either the sequence number or the ack
- - *      value must stay constant)
- - *    - Unexpected TCP option.
- - *
- - *    When these conditions are not satisfied it drops into a standard
- - *    receive procedure patterned after RFC793 to handle all cases.
- - *    The first three cases are guaranteed by proper pred_flags setting,
- - *    the rest is checked inline. Fast processing is turned on in
- - *    tcp_data_queue when everything is OK.
    */
   void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- -                       const struct tcphdr *th, unsigned int len)
+ +                       const struct tcphdr *th)
   {
+ +      unsigned int len = skb->len;
         struct tcp_sock *tp = tcp_sk(sk);
   
         tcp_mstamp_refresh(tp);
         if (unlikely(!sk->sk_rx_dst))
                 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
- -      /*
- -       *      Header prediction.
- -       *      The code loosely follows the one in the famous
- -       *      "30 instruction TCP receive" Van Jacobson mail.
- -       *
- -       *      Van's trick is to deposit buffers into socket queue
- -       *      on a device interrupt, to call tcp_recv function
- -       *      on the receive process context and checksum and copy
- -       *      the buffer to user space. smart...
- -       *
- -       *      Our current scheme is not silly either but we take the
- -       *      extra cost of the net_bh soft interrupt processing...
- -       *      We do checksum and copy also but from device to kernel.
- -       */
   
         tp->rx_opt.saw_tstamp = 0;
   
- -      /*      pred_flags is 0xS?10 << 16 + snd_wnd
- -       *      if header_prediction is to be made
- -       *      'S' will always be tp->tcp_header_len >> 2
- -       *      '?' will be 0 for the fast path, otherwise pred_flags is 0 to
- -       *  turn it off (when there are holes in the receive
- -       *       space for instance)
- -       *      PSH flag is ignored.
- -       */
- -
- -      if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
- -          TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
- -          !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
- -              int tcp_header_len = tp->tcp_header_len;
- -
- -              /* Timestamp header prediction: tcp_header_len
- -               * is automatically equal to th->doff*4 due to pred_flags
- -               * match.
- -               */
- -
- -              /* Check timestamp */
- -              if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
- -                      /* No? Slow path! */
- -                      if (!tcp_parse_aligned_timestamp(tp, th))
- -                              goto slow_path;
- -
- -                      /* If PAWS failed, check it more carefully in slow path */
- -                      if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
- -                              goto slow_path;
- -
- -                      /* DO NOT update ts_recent here, if checksum fails
- -                       * and timestamp was corrupted part, it will result
- -                       * in a hung connection since we will drop all
- -                       * future packets due to the PAWS test.
- -                       */
- -              }
- -
- -              if (len <= tcp_header_len) {
- -                      /* Bulk data transfer: sender */
- -                      if (len == tcp_header_len) {
- -                              /* Predicted packet is in window by definition.
- -                               * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- -                               * Hence, check seq<=rcv_wup reduces to:
- -                               */
- -                              if (tcp_header_len ==
- -                                  (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
- -                                  tp->rcv_nxt == tp->rcv_wup)
- -                                      tcp_store_ts_recent(tp);
- -
- -                              /* We know that such packets are checksummed
- -                               * on entry.
- -                               */
- -                              tcp_ack(sk, skb, 0);
- -                              __kfree_skb(skb);
- -                              tcp_data_snd_check(sk);
- -                              return;
- -                      } else { /* Header too small */
- -                              TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
- -                              goto discard;
- -                      }
- -              } else {
- -                      int eaten = 0;
- -                      bool fragstolen = false;
- -
- -                      if (tp->ucopy.task == current &&
- -                          tp->copied_seq == tp->rcv_nxt &&
- -                          len - tcp_header_len <= tp->ucopy.len &&
- -                          sock_owned_by_user(sk)) {
- -                              __set_current_state(TASK_RUNNING);
- -
- -                              if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
- -                                      /* Predicted packet is in window by definition.
- -                                       * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- -                                       * Hence, check seq<=rcv_wup reduces to:
- -                                       */
- -                                      if (tcp_header_len ==
- -                                          (sizeof(struct tcphdr) +
- -                                           TCPOLEN_TSTAMP_ALIGNED) &&
- -                                          tp->rcv_nxt == tp->rcv_wup)
- -                                              tcp_store_ts_recent(tp);
- -
- -                                      tcp_rcv_rtt_measure_ts(sk, skb);
- -
- -                                      __skb_pull(skb, tcp_header_len);
- -                                      tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
- -                                      NET_INC_STATS(sock_net(sk),
- -                                                      LINUX_MIB_TCPHPHITSTOUSER);
- -                                      eaten = 1;
- -                              }
- -                      }
- -                      if (!eaten) {
- -                              if (tcp_checksum_complete(skb))
- -                                      goto csum_error;
- -
- -                              if ((int)skb->truesize > sk->sk_forward_alloc)
- -                                      goto step5;
- -
- -                              /* Predicted packet is in window by definition.
- -                               * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- -                               * Hence, check seq<=rcv_wup reduces to:
- -                               */
- -                              if (tcp_header_len ==
- -                                  (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
- -                                  tp->rcv_nxt == tp->rcv_wup)
- -                                      tcp_store_ts_recent(tp);
- -
- -                              tcp_rcv_rtt_measure_ts(sk, skb);
- -
- -                              NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
- -
- -                              /* Bulk data transfer: receiver */
- -                              eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
- -                                                    &fragstolen);
- -                      }
- -
- -                      tcp_event_data_recv(sk, skb);
- -
- -                      if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
- -                              /* Well, only one small jumplet in fast path... */
- -                              tcp_ack(sk, skb, FLAG_DATA);
- -                              tcp_data_snd_check(sk);
- -                              if (!inet_csk_ack_scheduled(sk))
- -                                      goto no_ack;
- -                      }
- -
- -                      __tcp_ack_snd_check(sk, 0);
- -no_ack:
- -                      if (eaten)
- -                              kfree_skb_partial(skb, fragstolen);
- -                      sk->sk_data_ready(sk);
- -                      return;
- -              }
- -      }
- -
- -slow_path:
         if (len < (th->doff << 2) || tcp_checksum_complete(skb))
                 goto csum_error;
   
         if (!th->ack && !th->rst && !th->syn)
                 goto discard;
   
- -      /*
- -       *      Standard slow path.
- -       */
- -
         if (!tcp_validate_incoming(sk, skb, th, 1))
                 return;
   
- -step5:
- -      if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
+ +      if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0)
                 goto discard;
   
         tcp_rcv_rtt_measure_ts(sk, skb);
@@@ -5347,6 -5586,12 +5346,6 @@@ void tcp_finish_connect(struct sock *sk
   
         if (sock_flag(sk, SOCK_KEEPOPEN))
                 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
- -
- -      if (!tp->rx_opt.snd_wscale)
- -              __tcp_fast_path_on(tp, tp->snd_wnd);
- -      else
- -              tp->pred_flags = 0;
- -
   }
   
   static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@@ -5475,7 -5720,7 +5474,7 @@@ static int tcp_rcv_synsent_state_proces
                 tcp_ecn_rcv_synack(tp, th);
   
                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- -              tcp_ack(sk, skb, FLAG_SLOWPATH);
+ +              tcp_ack(sk, skb, 0);
   
                 /* Ok.. it's good. Set up sequence numbers and
                  * move to established.
@@@ -5711,8 -5956,8 +5710,8 @@@ int tcp_rcv_state_process(struct sock *
                 return 0;
   
         /* step 5: check the ACK field */
- -      acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
- -                                    FLAG_UPDATE_TS_RECENT |
+ +
+ +      acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT |
                                       FLAG_NO_CHALLENGE_ACK) > 0;
   
         if (!acceptable) {
@@@ -5780,6 -6025,7 +5779,6 @@@
                 tp->lsndtime = tcp_jiffies32;
   
                 tcp_initialize_rcv_mss(sk);
- -              tcp_fast_path_on(tp);
                 break;
   
         case TCP_FIN_WAIT1: {
diff --combined net/ipv4/udp.c

index cb633884e8259ad6620686ce773cb8ee6e9266de,cd1d044a7fa580f315af0fd81eb1bf425fd1f38c..25fb14490d6a9d47811f1cef17095b8cb1c27332
--- 1/net/ipv4/udp.c
--- 2/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -380,8 -380,8 +380,8 @@@ int udp_v4_get_port(struct sock *sk, un
   
   static int compute_score(struct sock *sk, struct net *net,
                          __be32 saddr, __be16 sport,
- -                       __be32 daddr, unsigned short hnum, int dif,
- -                       bool exact_dif)
+ +                       __be32 daddr, unsigned short hnum,
+ +                       int dif, int sdif, bool exact_dif)
   {
         int score;
         struct inet_sock *inet;
@@@ -413,15 -413,10 +413,15 @@@
         }
   
         if (sk->sk_bound_dev_if || exact_dif) {
- -              if (sk->sk_bound_dev_if != dif)
+ +              bool dev_match = (sk->sk_bound_dev_if == dif ||
+ +                                sk->sk_bound_dev_if == sdif);
+ +
+ +              if (exact_dif && !dev_match)
                         return -1;
- -              score += 4;
+ +              if (sk->sk_bound_dev_if && dev_match)
+ +                      score += 4;
         }
+ +
         if (sk->sk_incoming_cpu == raw_smp_processor_id())
                 score++;
         return score;
@@@ -441,11 -436,10 +441,11 @@@ static u32 udp_ehashfn(const struct ne
   
   /* called with rcu_read_lock() */
   static struct sock *udp4_lib_lookup2(struct net *net,
- -              __be32 saddr, __be16 sport,
- -              __be32 daddr, unsigned int hnum, int dif, bool exact_dif,
- -              struct udp_hslot *hslot2,
- -              struct sk_buff *skb)
+ +                                   __be32 saddr, __be16 sport,
+ +                                   __be32 daddr, unsigned int hnum,
+ +                                   int dif, int sdif, bool exact_dif,
+ +                                   struct udp_hslot *hslot2,
+ +                                   struct sk_buff *skb)
   {
         struct sock *sk, *result;
         int score, badness, matches = 0, reuseport = 0;
@@@ -455,7 -449,7 +455,7 @@@
         badness = 0;
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
                 score = compute_score(sk, net, saddr, sport,
- -                                    daddr, hnum, dif, exact_dif);
+ +                                    daddr, hnum, dif, sdif, exact_dif);
                 if (score > badness) {
                         reuseport = sk->sk_reuseport;
                         if (reuseport) {
@@@ -483,8 -477,8 +483,8 @@@
    * harder than this. -DaveM
    */
   struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
- -              __be16 sport, __be32 daddr, __be16 dport,
- -              int dif, struct udp_table *udptable, struct sk_buff *skb)
+ +              __be16 sport, __be32 daddr, __be16 dport, int dif,
+ +              int sdif, struct udp_table *udptable, struct sk_buff *skb)
   {
         struct sock *sk, *result;
         unsigned short hnum = ntohs(dport);
@@@ -502,7 -496,7 +502,7 @@@
                         goto begin;
   
                 result = udp4_lib_lookup2(net, saddr, sport,
- -                                        daddr, hnum, dif,
+ +                                        daddr, hnum, dif, sdif,
                                           exact_dif, hslot2, skb);
                 if (!result) {
                         unsigned int old_slot2 = slot2;
@@@ -517,7 -511,7 +517,7 @@@
                                 goto begin;
   
                         result = udp4_lib_lookup2(net, saddr, sport,
- -                                                daddr, hnum, dif,
+ +                                                daddr, hnum, dif, sdif,
                                                   exact_dif, hslot2, skb);
                 }
                 return result;
@@@ -527,7 -521,7 +527,7 @@@ begin
         badness = 0;
         sk_for_each_rcu(sk, &hslot->head) {
                 score = compute_score(sk, net, saddr, sport,
- -                                    daddr, hnum, dif, exact_dif);
+ +                                    daddr, hnum, dif, sdif, exact_dif);
                 if (score > badness) {
                         reuseport = sk->sk_reuseport;
                         if (reuseport) {
@@@ -560,7 -554,7 +560,7 @@@ static inline struct sock *__udp4_lib_l
   
         return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
                                  iph->daddr, dport, inet_iif(skb),
- -                               udptable, skb);
+ +                               inet_sdif(skb), udptable, skb);
   }
   
   struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
@@@ -582,7 -576,7 +582,7 @@@ struct sock *udp4_lib_lookup(struct ne
         struct sock *sk;
   
         sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
- -                             dif, &udp_table, NULL);
+ +                             dif, 0, &udp_table, NULL);
         if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         return sk;
@@@ -593,7 -587,7 +593,7 @@@ EXPORT_SYMBOL_GPL(udp4_lib_lookup)
   static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
                                        __be16 loc_port, __be32 loc_addr,
                                        __be16 rmt_port, __be32 rmt_addr,
- -                                     int dif, unsigned short hnum)
+ +                                     int dif, int sdif, unsigned short hnum)
   {
         struct inet_sock *inet = inet_sk(sk);
   
@@@ -603,10 -597,9 +603,10 @@@
             (inet->inet_dport != rmt_port && inet->inet_dport) ||
             (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
             ipv6_only_sock(sk) ||
- -          (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ +          (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ +           sk->sk_bound_dev_if != sdif))
                 return false;
- -      if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif))
+ +      if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
                 return false;
         return true;
   }
@@@ -635,8 -628,8 +635,8 @@@ void __udp4_lib_err(struct sk_buff *skb
         struct net *net = dev_net(skb->dev);
   
         sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
- -                      iph->saddr, uh->source, skb->dev->ifindex, udptable,
- -                      NULL);
+ +                             iph->saddr, uh->source, skb->dev->ifindex, 0,
+ +                             udptable, NULL);
         if (!sk) {
                 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
                 return; /* No socket for error */
@@@ -1183,11 -1176,7 +1183,11 @@@ static void udp_set_dev_scratch(struct 
         scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
         scratch->is_linear = !skb_is_nonlinear(skb);
   #endif
- -      if (likely(!skb->_skb_refdst))
+ +      /* all head states execept sp (dst, sk, nf) are always cleared by
+ +       * udp_rcv() and we need to preserve secpath, if present, to eventually
+ +       * process IP_CMSG_PASSSEC at recvmsg() time
+ +       */
+ +      if (likely(!skb_sec_path(skb)))
                 scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
   }
   
@@@ -1585,7 -1574,8 +1585,8 @@@ int udp_recvmsg(struct sock *sk, struc
                 return ip_recv_error(sk, msg, len, addr_len);
   
   try_again:
-       peeking = off = sk_peek_offset(sk, flags);
+       peeking = flags & MSG_PEEK;
+       off = sk_peek_offset(sk, flags);
         skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
         if (!skb)
                 return err;
@@@ -1793,6 -1783,13 +1794,6 @@@ static int __udp_queue_rcv_skb(struct s
                 sk_mark_napi_id_once(sk, skb);
         }
   
- -      /* At recvmsg() time we may access skb->dst or skb->sp depending on
- -       * the IP options and the cmsg flags, elsewhere can we clear all
- -       * pending head states while they are hot in the cache
- -       */
- -      if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
- -              skb_release_head_state(skb);
- -
         rc = __udp_enqueue_schedule_skb(sk, skb);
         if (rc < 0) {
                 int is_udplite = IS_UDPLITE(sk);
@@@ -1960,7 -1957,6 +1961,7 @@@ static int __udp4_lib_mcast_deliver(str
         unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
         unsigned int offset = offsetof(typeof(*sk), sk_node);
         int dif = skb->dev->ifindex;
+ +      int sdif = inet_sdif(skb);
         struct hlist_node *node;
         struct sk_buff *nskb;
   
@@@ -1975,7 -1971,7 +1976,7 @@@ start_lookup
   
         sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
                 if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
- -                                       uh->source, saddr, dif, hnum))
+ +                                       uh->source, saddr, dif, sdif, hnum))
                         continue;
   
                 if (!first) {
@@@ -2165,7 -2161,7 +2166,7 @@@ drop
   static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
                                                   __be16 loc_port, __be32 loc_addr,
                                                   __be16 rmt_port, __be32 rmt_addr,
- -                                                int dif)
+ +                                                int dif, int sdif)
   {
         struct sock *sk, *result;
         unsigned short hnum = ntohs(loc_port);
@@@ -2179,7 -2175,7 +2180,7 @@@
         result = NULL;
         sk_for_each_rcu(sk, &hslot->head) {
                 if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
- -                                      rmt_port, rmt_addr, dif, hnum)) {
+ +                                      rmt_port, rmt_addr, dif, sdif, hnum)) {
                         if (result)
                                 return NULL;
                         result = sk;
@@@ -2196,7 -2192,7 +2197,7 @@@
   static struct sock *__udp4_lib_demux_lookup(struct net *net,
                                             __be16 loc_port, __be32 loc_addr,
                                             __be16 rmt_port, __be32 rmt_addr,
- -                                          int dif)
+ +                                          int dif, int sdif)
   {
         unsigned short hnum = ntohs(loc_port);
         unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
@@@ -2208,7 -2204,7 +2209,7 @@@
   
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
                 if (INET_MATCH(sk, net, acookie, rmt_addr,
- -                             loc_addr, ports, dif))
+ +                             loc_addr, ports, dif, sdif))
                         return sk;
                 /* Only check first socket in chain */
                 break;
@@@ -2224,7 -2220,6 +2225,7 @@@ void udp_v4_early_demux(struct sk_buff 
         struct sock *sk = NULL;
         struct dst_entry *dst;
         int dif = skb->dev->ifindex;
+ +      int sdif = inet_sdif(skb);
         int ours;
   
         /* validate the packet */
@@@ -2250,11 -2245,10 +2251,11 @@@
                 }
   
                 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
- -                                                 uh->source, iph->saddr, dif);
+ +                                                 uh->source, iph->saddr,
+ +                                                 dif, sdif);
         } else if (skb->pkt_type == PACKET_HOST) {
                 sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
- -                                           uh->source, iph->saddr, dif);
+ +                                           uh->source, iph->saddr, dif, sdif);
         }
   
         if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
diff --combined net/ipv6/ip6_fib.c

index 8c58c7558de003bb47e9c50c7f4fcfc0b7251bd5,5cc0ea0381981b0539d5d6e67401d962a6f6a230..549aacc3cb2c6f803a19d97e295ceac56ce6ef44
--- 1/net/ipv6/ip6_fib.c
--- 2/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@@ -33,7 -33,6 +33,7 @@@
   #include <net/ndisc.h>
   #include <net/addrconf.h>
   #include <net/lwtunnel.h>
+ +#include <net/fib_notifier.h>
   
   #include <net/ip6_fib.h>
   #include <net/ip6_route.h>
@@@ -154,7 -153,7 +154,7 @@@ static void node_free(struct fib6_node 
         kmem_cache_free(fib6_node_kmem, fn);
   }
   
- -static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+ +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
   {
         int cpu;
   
@@@ -177,7 -176,15 +177,7 @@@
         free_percpu(non_pcpu_rt->rt6i_pcpu);
         non_pcpu_rt->rt6i_pcpu = NULL;
   }
- -
- -static void rt6_release(struct rt6_info *rt)
- -{
- -      if (atomic_dec_and_test(&rt->rt6i_ref)) {
- -              rt6_free_pcpu(rt);
- -              dst_dev_put(&rt->dst);
- -              dst_release(&rt->dst);
- -      }
- -}
+ +EXPORT_SYMBOL_GPL(rt6_free_pcpu);
   
   static void fib6_link_table(struct net *net, struct fib6_table *tb)
   {
@@@ -295,109 -302,6 +295,109 @@@ static void __net_init fib6_tables_init
   
   #endif
   
+ +unsigned int fib6_tables_seq_read(struct net *net)
+ +{
+ +      unsigned int h, fib_seq = 0;
+ +
+ +      rcu_read_lock();
+ +      for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ +              struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ +              struct fib6_table *tb;
+ +
+ +              hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+ +                      read_lock_bh(&tb->tb6_lock);
+ +                      fib_seq += tb->fib_seq;
+ +                      read_unlock_bh(&tb->tb6_lock);
+ +              }
+ +      }
+ +      rcu_read_unlock();
+ +
+ +      return fib_seq;
+ +}
+ +
+ +static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+ +                                  enum fib_event_type event_type,
+ +                                  struct rt6_info *rt)
+ +{
+ +      struct fib6_entry_notifier_info info = {
+ +              .rt = rt,
+ +      };
+ +
+ +      return call_fib6_notifier(nb, net, event_type, &info.info);
+ +}
+ +
+ +static int call_fib6_entry_notifiers(struct net *net,
+ +                                   enum fib_event_type event_type,
+ +                                   struct rt6_info *rt)
+ +{
+ +      struct fib6_entry_notifier_info info = {
+ +              .rt = rt,
+ +      };
+ +
+ +      rt->rt6i_table->fib_seq++;
+ +      return call_fib6_notifiers(net, event_type, &info.info);
+ +}
+ +
+ +struct fib6_dump_arg {
+ +      struct net *net;
+ +      struct notifier_block *nb;
+ +};
+ +
+ +static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+ +{
+ +      if (rt == arg->net->ipv6.ip6_null_entry)
+ +              return;
+ +      call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+ +}
+ +
+ +static int fib6_node_dump(struct fib6_walker *w)
+ +{
+ +      struct rt6_info *rt;
+ +
+ +      for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+ +              fib6_rt_dump(rt, w->args);
+ +      w->leaf = NULL;
+ +      return 0;
+ +}
+ +
+ +static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+ +                          struct fib6_walker *w)
+ +{
+ +      w->root = &tb->tb6_root;
+ +      read_lock_bh(&tb->tb6_lock);
+ +      fib6_walk(net, w);
+ +      read_unlock_bh(&tb->tb6_lock);
+ +}
+ +
+ +/* Called with rcu_read_lock() */
+ +int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+ +{
+ +      struct fib6_dump_arg arg;
+ +      struct fib6_walker *w;
+ +      unsigned int h;
+ +
+ +      w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ +      if (!w)
+ +              return -ENOMEM;
+ +
+ +      w->func = fib6_node_dump;
+ +      arg.net = net;
+ +      arg.nb = nb;
+ +      w->args = &arg;
+ +
+ +      for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ +              struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ +              struct fib6_table *tb;
+ +
+ +              hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+ +                      fib6_table_dump(net, tb, w);
+ +      }
+ +
+ +      kfree(w);
+ +
+ +      return 0;
+ +}
+ +
   static int fib6_dump_node(struct fib6_walker *w)
   {
         int res;
@@@ -829,6 -733,8 +829,6 @@@ static void fib6_purge_rt(struct rt6_in
                         }
                         fn = fn->parent;
                 }
- -              /* No more references are possible at this point. */
- -              BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
         }
   }
   
@@@ -973,8 -879,6 +973,8 @@@ add
                 *ins = rt;
                 rt->rt6i_node = fn;
                 atomic_inc(&rt->rt6i_ref);
+ +              call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+ +                                        rt);
                 if (!info->skip_notify)
                         inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
                 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@@ -1002,8 -906,6 +1002,8 @@@
                 rt->rt6i_node = fn;
                 rt->dst.rt6_next = iter->dst.rt6_next;
                 atomic_inc(&rt->rt6i_ref);
+ +              call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+ +                                        rt);
                 if (!info->skip_notify)
                         inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
                 if (!(fn->fn_flags & RTN_RTINFO)) {
@@@ -1011,8 -913,9 +1011,10 @@@
                         fn->fn_flags |= RTN_RTINFO;
                 }
                 nsiblings = iter->rt6i_nsiblings;
+ +              iter->rt6i_node = NULL;
                 fib6_purge_rt(iter, fn, info->nl_net);
+               if (fn->rr_ptr == iter)
+                       fn->rr_ptr = NULL;
                 rt6_release(iter);
   
                 if (nsiblings) {
@@@ -1024,8 -927,9 +1026,10 @@@
                                         break;
                                 if (rt6_qualify_for_ecmp(iter)) {
                                         *ins = iter->dst.rt6_next;
+ +                                      iter->rt6i_node = NULL;
                                         fib6_purge_rt(iter, fn, info->nl_net);
+                                       if (fn->rr_ptr == iter)
+                                               fn->rr_ptr = NULL;
                                         rt6_release(iter);
                                         nsiblings--;
                                 } else {
@@@ -1114,7 -1018,7 +1118,7 @@@ int fib6_add(struct fib6_node *root, st
                         /* Create subtree root node */
                         sfn = node_alloc();
                         if (!sfn)
-                               goto st_failure;
+                               goto failure;
   
                         sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
                         atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
@@@ -1131,12 -1035,12 +1135,12 @@@
   
                         if (IS_ERR(sn)) {
                                 /* If it is failed, discard just allocated
-                                  root, and then (in st_failure) stale node
+                                  root, and then (in failure) stale node
                                    in main tree.
                                  */
                                 node_free(sfn);
                                 err = PTR_ERR(sn);
-                               goto st_failure;
+                               goto failure;
                         }
   
                         /* Now link new subtree to main tree */
@@@ -1151,7 -1055,7 +1155,7 @@@
   
                         if (IS_ERR(sn)) {
                                 err = PTR_ERR(sn);
-                               goto st_failure;
+                               goto failure;
                         }
                 }
   
@@@ -1192,18 -1096,17 +1196,17 @@@ out
                         atomic_inc(&pn->leaf->rt6i_ref);
                 }
   #endif
-               /* Always release dst as dst->__refcnt is guaranteed
-                * to be taken before entering this function
-                */
-               dst_release_immediate(&rt->dst);
+               goto failure;
         }
         return err;
   
- #ifdef CONFIG_IPV6_SUBTREES
-       /* Subtree creation failed, probably main tree node
-          is orphan. If it is, shoot it.
+ failure:
+       /* fn->leaf could be NULL if fn is an intermediate node and we
+        * failed to add the new route to it in both subtree creation
+        * failure and fib6_add_rt2node() failure case.
+        * In both cases, fib6_repair_tree() should be called to fix
+        * fn->leaf.
          */
- st_failure:
         if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
                 fib6_repair_tree(info->nl_net, fn);
         /* Always release dst as dst->__refcnt is guaranteed
@@@ -1211,7 -1114,6 +1214,6 @@@
          */
         dst_release_immediate(&rt->dst);
         return err;
- #endif
   }
   
   /*
@@@ -1559,7 -1461,6 +1561,7 @@@ static void fib6_del_route(struct fib6_
   
         fib6_purge_rt(rt, fn, net);
   
+ +      call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
         if (!info->skip_notify)
                 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
         rt6_release(rt);
@@@ -1940,11 -1841,6 +1942,11 @@@ static void fib6_gc_timer_cb(unsigned l
   static int __net_init fib6_net_init(struct net *net)
   {
         size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+ +      int err;
+ +
+ +      err = fib6_notifier_init(net);
+ +      if (err)
+ +              return err;
   
         spin_lock_init(&net->ipv6.fib6_gc_lock);
         rwlock_init(&net->ipv6.fib6_walker_lock);
@@@ -1997,7 -1893,6 +1999,7 @@@ out_fib_table_hash
   out_rt6_stats:
         kfree(net->ipv6.rt6_stats);
   out_timer:
+ +      fib6_notifier_exit(net);
         return -ENOMEM;
   }
   
@@@ -2014,7 -1909,6 +2016,7 @@@ static void fib6_net_exit(struct net *n
         kfree(net->ipv6.fib6_main_tbl);
         kfree(net->ipv6.fib_table_hash);
         kfree(net->ipv6.rt6_stats);
+ +      fib6_notifier_exit(net);
   }
   
   static struct pernet_operations fib6_net_ops = {
@@@ -2038,7 -1932,7 +2040,7 @@@ int __init fib6_init(void
                 goto out_kmem_cache_create;
   
         ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
- -                            NULL);
+ +                            0);
         if (ret)
                 goto out_unregister_subsys;
   
diff --combined net/ipv6/udp.c

index 19afcaf4a22e036a8768fd1c4b4f870c8fa47d37,20039c8501eb9729619f4337a2757a2954163614..2a15f1bb6ef8643a07997b89f71f034231ea653c
--- 1/net/ipv6/udp.c
--- 2/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@@ -129,7 -129,7 +129,7 @@@ static void udp_v6_rehash(struct sock *
   static int compute_score(struct sock *sk, struct net *net,
                          const struct in6_addr *saddr, __be16 sport,
                          const struct in6_addr *daddr, unsigned short hnum,
- -                       int dif, bool exact_dif)
+ +                       int dif, int sdif, bool exact_dif)
   {
         int score;
         struct inet_sock *inet;
@@@ -161,13 -161,9 +161,13 @@@
         }
   
         if (sk->sk_bound_dev_if || exact_dif) {
- -              if (sk->sk_bound_dev_if != dif)
+ +              bool dev_match = (sk->sk_bound_dev_if == dif ||
+ +                                sk->sk_bound_dev_if == sdif);
+ +
+ +              if (exact_dif && !dev_match)
                         return -1;
- -              score++;
+ +              if (sk->sk_bound_dev_if && dev_match)
+ +                      score++;
         }
   
         if (sk->sk_incoming_cpu == raw_smp_processor_id())
@@@ -179,9 -175,9 +179,9 @@@
   /* called with rcu_read_lock() */
   static struct sock *udp6_lib_lookup2(struct net *net,
                 const struct in6_addr *saddr, __be16 sport,
- -              const struct in6_addr *daddr, unsigned int hnum, int dif,
- -              bool exact_dif, struct udp_hslot *hslot2,
- -              struct sk_buff *skb)
+ +              const struct in6_addr *daddr, unsigned int hnum,
+ +              int dif, int sdif, bool exact_dif,
+ +              struct udp_hslot *hslot2, struct sk_buff *skb)
   {
         struct sock *sk, *result;
         int score, badness, matches = 0, reuseport = 0;
@@@ -191,7 -187,7 +191,7 @@@
         badness = -1;
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
                 score = compute_score(sk, net, saddr, sport,
- -                                    daddr, hnum, dif, exact_dif);
+ +                                    daddr, hnum, dif, sdif, exact_dif);
                 if (score > badness) {
                         reuseport = sk->sk_reuseport;
                         if (reuseport) {
@@@ -218,10 -214,10 +218,10 @@@
   
   /* rcu_read_lock() must be held */
   struct sock *__udp6_lib_lookup(struct net *net,
- -                                    const struct in6_addr *saddr, __be16 sport,
- -                                    const struct in6_addr *daddr, __be16 dport,
- -                                    int dif, struct udp_table *udptable,
- -                                    struct sk_buff *skb)
+ +                             const struct in6_addr *saddr, __be16 sport,
+ +                             const struct in6_addr *daddr, __be16 dport,
+ +                             int dif, int sdif, struct udp_table *udptable,
+ +                             struct sk_buff *skb)
   {
         struct sock *sk, *result;
         unsigned short hnum = ntohs(dport);
@@@ -239,7 -235,7 +239,7 @@@
                         goto begin;
   
                 result = udp6_lib_lookup2(net, saddr, sport,
- -                                        daddr, hnum, dif, exact_dif,
+ +                                        daddr, hnum, dif, sdif, exact_dif,
                                           hslot2, skb);
                 if (!result) {
                         unsigned int old_slot2 = slot2;
@@@ -254,7 -250,7 +254,7 @@@
                                 goto begin;
   
                         result = udp6_lib_lookup2(net, saddr, sport,
- -                                                daddr, hnum, dif,
+ +                                                daddr, hnum, dif, sdif,
                                                   exact_dif, hslot2,
                                                   skb);
                 }
@@@ -265,7 -261,7 +265,7 @@@ begin
         badness = -1;
         sk_for_each_rcu(sk, &hslot->head) {
                 score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
- -                                    exact_dif);
+ +                                    sdif, exact_dif);
                 if (score > badness) {
                         reuseport = sk->sk_reuseport;
                         if (reuseport) {
@@@ -298,7 -294,7 +298,7 @@@ static struct sock *__udp6_lib_lookup_s
   
         return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
                                  &iph->daddr, dport, inet6_iif(skb),
- -                               udptable, skb);
+ +                               inet6_sdif(skb), udptable, skb);
   }
   
   struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
@@@ -308,7 -304,7 +308,7 @@@
   
         return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
                                  &iph->daddr, dport, inet6_iif(skb),
- -                               &udp_table, skb);
+ +                               inet6_sdif(skb), &udp_table, skb);
   }
   EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
   
@@@ -324,7 -320,7 +324,7 @@@ struct sock *udp6_lib_lookup(struct ne
         struct sock *sk;
   
         sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
- -                              dif, &udp_table, NULL);
+ +                              dif, 0, &udp_table, NULL);
         if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         return sk;
@@@ -366,7 -362,8 +366,8 @@@ int udpv6_recvmsg(struct sock *sk, stru
                 return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
   
   try_again:
-       peeking = off = sk_peek_offset(sk, flags);
+       peeking = flags & MSG_PEEK;
+       off = sk_peek_offset(sk, flags);
         skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
         if (!skb)
                 return err;
@@@ -505,7 -502,7 +506,7 @@@ void __udp6_lib_err(struct sk_buff *skb
         struct net *net = dev_net(skb->dev);
   
         sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- -                             inet6_iif(skb), udptable, skb);
+ +                             inet6_iif(skb), 0, udptable, skb);
         if (!sk) {
                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
                                   ICMP6_MIB_INERRORS);
@@@ -897,7 -894,7 +898,7 @@@ discard
   static struct sock *__udp6_lib_demux_lookup(struct net *net,
                         __be16 loc_port, const struct in6_addr *loc_addr,
                         __be16 rmt_port, const struct in6_addr *rmt_addr,
- -                      int dif)
+ +                      int dif, int sdif)
   {
         unsigned short hnum = ntohs(loc_port);
         unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
@@@ -908,7 -905,7 +909,7 @@@
   
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
                 if (sk->sk_state == TCP_ESTABLISHED &&
- -                  INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+ +                  INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
                         return sk;
                 /* Only check first socket in chain */
                 break;
@@@ -923,7 -920,6 +924,7 @@@ static void udp_v6_early_demux(struct s
         struct sock *sk;
         struct dst_entry *dst;
         int dif = skb->dev->ifindex;
+ +      int sdif = inet6_sdif(skb);
   
         if (!pskb_may_pull(skb, skb_transport_offset(skb) +
             sizeof(struct udphdr)))
@@@ -935,7 -931,7 +936,7 @@@
                 sk = __udp6_lib_demux_lookup(net, uh->dest,
                                              &ipv6_hdr(skb)->daddr,
                                              uh->source, &ipv6_hdr(skb)->saddr,
- -                                           dif);
+ +                                           dif, sdif);
         else
                 return;
   
diff --combined net/openvswitch/datapath.c

index f6e229b51dfb39acdb0e8458062fd50db58eb9f2,6b44fe4052825a87b373bafb58ca014e3ec99015..76cf273a56c791bbc84262811c2f35e94e024dcf
--- 1/net/openvswitch/datapath.c
--- 2/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@@ -335,6 -335,8 +335,6 @@@ static int queue_gso_packets(struct dat
                              const struct dp_upcall_info *upcall_info,
                                  uint32_t cutlen)
   {
- -      unsigned short gso_type = skb_shinfo(skb)->gso_type;
- -      struct sw_flow_key later_key;
         struct sk_buff *segs, *nskb;
         int err;
   
@@@ -345,9 -347,21 +345,9 @@@
         if (segs == NULL)
                 return -EINVAL;
   
- -      if (gso_type & SKB_GSO_UDP) {
- -              /* The initial flow key extracted by ovs_flow_key_extract()
- -               * in this case is for a first fragment, so we need to
- -               * properly mark later fragments.
- -               */
- -              later_key = *key;
- -              later_key.ip.frag = OVS_FRAG_TYPE_LATER;
- -      }
- -
         /* Queue all of the segments. */
         skb = segs;
         do {
- -              if (gso_type & SKB_GSO_UDP && skb != segs)
- -                      key = &later_key;
- -
                 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
                 if (err)
                         break;
@@@ -367,7 -381,7 +367,7 @@@
   }
   
   static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
-                             unsigned int hdrlen)
+                             unsigned int hdrlen, int actions_attrlen)
   {
         size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
                 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
@@@ -384,7 -398,7 +384,7 @@@
   
         /* OVS_PACKET_ATTR_ACTIONS */
         if (upcall_info->actions_len)
-               size += nla_total_size(upcall_info->actions_len);
+               size += nla_total_size(actions_attrlen);
   
         /* OVS_PACKET_ATTR_MRU */
         if (upcall_info->mru)
@@@ -451,7 -465,8 +451,8 @@@ static int queue_userspace_packet(struc
         else
                 hlen = skb->len;
   
-       len = upcall_msg_size(upcall_info, hlen - cutlen);
+       len = upcall_msg_size(upcall_info, hlen - cutlen,
+                             OVS_CB(skb)->acts_origlen);
         user_skb = genlmsg_new(len, GFP_ATOMIC);
         if (!user_skb) {
                 err = -ENOMEM;
diff --combined net/sched/cls_api.c

index ebeeb87e6d44da32b998d517f17586ced8790005,9fd44c22134783edf3db4f62ff1e8184c455cbd7..eef6b077f30ed69d2dc04bea61a851dc449cafee
--- 1/net/sched/cls_api.c
--- 2/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@@ -100,6 -100,21 +100,6 @@@ int unregister_tcf_proto_ops(struct tcf
   }
   EXPORT_SYMBOL(unregister_tcf_proto_ops);
   
- -static int tfilter_notify(struct net *net, struct sk_buff *oskb,
- -                        struct nlmsghdr *n, struct tcf_proto *tp,
- -                        unsigned long fh, int event, bool unicast);
- -
- -static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
- -                               struct nlmsghdr *n,
- -                               struct tcf_chain *chain, int event)
- -{
- -      struct tcf_proto *tp;
- -
- -      for (tp = rtnl_dereference(chain->filter_chain);
- -           tp; tp = rtnl_dereference(tp->next))
- -              tfilter_notify(net, oskb, n, tp, 0, event, false);
- -}
- -
   /* Select new prio value from the range, managed by kernel. */
   
   static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@@ -190,7 -205,7 +190,7 @@@ static void tcf_chain_flush(struct tcf_
   {
         struct tcf_proto *tp;
   
-       if (*chain->p_filter_chain)
+       if (chain->p_filter_chain)
                 RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
         while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
                 RCU_INIT_POINTER(chain->filter_chain, tp->next);
@@@ -392,109 -407,6 +392,109 @@@ static struct tcf_proto *tcf_chain_tp_f
         return tp;
   }
   
+ +static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+ +                       struct tcf_proto *tp, void *fh, u32 portid,
+ +                       u32 seq, u16 flags, int event)
+ +{
+ +      struct tcmsg *tcm;
+ +      struct nlmsghdr  *nlh;
+ +      unsigned char *b = skb_tail_pointer(skb);
+ +
+ +      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+ +      if (!nlh)
+ +              goto out_nlmsg_trim;
+ +      tcm = nlmsg_data(nlh);
+ +      tcm->tcm_family = AF_UNSPEC;
+ +      tcm->tcm__pad1 = 0;
+ +      tcm->tcm__pad2 = 0;
+ +      tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
+ +      tcm->tcm_parent = tp->classid;
+ +      tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
+ +      if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
+ +              goto nla_put_failure;
+ +      if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
+ +              goto nla_put_failure;
+ +      if (!fh) {
+ +              tcm->tcm_handle = 0;
+ +      } else {
+ +              if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+ +                      goto nla_put_failure;
+ +      }
+ +      nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ +      return skb->len;
+ +
+ +out_nlmsg_trim:
+ +nla_put_failure:
+ +      nlmsg_trim(skb, b);
+ +      return -1;
+ +}
+ +
+ +static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+ +                        struct nlmsghdr *n, struct tcf_proto *tp,
+ +                        void *fh, int event, bool unicast)
+ +{
+ +      struct sk_buff *skb;
+ +      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ +
+ +      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ +      if (!skb)
+ +              return -ENOBUFS;
+ +
+ +      if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ +                        n->nlmsg_flags, event) <= 0) {
+ +              kfree_skb(skb);
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (unicast)
+ +              return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ +
+ +      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ +                            n->nlmsg_flags & NLM_F_ECHO);
+ +}
+ +
+ +static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
+ +                            struct nlmsghdr *n, struct tcf_proto *tp,
+ +                            void *fh, bool unicast, bool *last)
+ +{
+ +      struct sk_buff *skb;
+ +      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ +      int err;
+ +
+ +      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ +      if (!skb)
+ +              return -ENOBUFS;
+ +
+ +      if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ +                        n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ +              kfree_skb(skb);
+ +              return -EINVAL;
+ +      }
+ +
+ +      err = tp->ops->delete(tp, fh, last);
+ +      if (err) {
+ +              kfree_skb(skb);
+ +              return err;
+ +      }
+ +
+ +      if (unicast)
+ +              return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ +
+ +      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ +                            n->nlmsg_flags & NLM_F_ECHO);
+ +}
+ +
+ +static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+ +                               struct nlmsghdr *n,
+ +                               struct tcf_chain *chain, int event)
+ +{
+ +      struct tcf_proto *tp;
+ +
+ +      for (tp = rtnl_dereference(chain->filter_chain);
+ +           tp; tp = rtnl_dereference(tp->next))
+ +              tfilter_notify(net, oskb, n, tp, 0, event, false);
+ +}
+ +
   /* Add/change/delete/get a filter node */
   
   static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@@ -516,7 -428,7 +516,7 @@@
         struct tcf_proto *tp;
         const struct Qdisc_class_ops *cops;
         unsigned long cl;
- -      unsigned long fh;
+ +      void *fh;
         int err;
         int tp_created;
   
@@@ -655,7 -567,7 +655,7 @@@ replay
   
         fh = tp->ops->get(tp, t->tcm_handle);
   
- -      if (fh == 0) {
+ +      if (!fh) {
                 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
                         tcf_chain_tp_remove(chain, &chain_info, tp);
                         tfilter_notify(net, skb, n, tp, fh,
@@@ -683,10 -595,11 +683,10 @@@
                         }
                         break;
                 case RTM_DELTFILTER:
- -                      err = tp->ops->delete(tp, fh, &last);
+ +                      err = tfilter_del_notify(net, skb, n, tp, fh, false,
+ +                                               &last);
                         if (err)
                                 goto errout;
- -                      tfilter_notify(net, skb, n, tp, t->tcm_handle,
- -                                     RTM_DELTFILTER, false);
                         if (last) {
                                 tcf_chain_tp_remove(chain, &chain_info, tp);
                                 tcf_proto_destroy(tp);
@@@ -724,13 -637,75 +724,13 @@@ errout
         return err;
   }
   
- -static int tcf_fill_node(struct net *net, struct sk_buff *skb,
- -                       struct tcf_proto *tp, unsigned long fh, u32 portid,
- -                       u32 seq, u16 flags, int event)
- -{
- -      struct tcmsg *tcm;
- -      struct nlmsghdr  *nlh;
- -      unsigned char *b = skb_tail_pointer(skb);
- -
- -      nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
- -      if (!nlh)
- -              goto out_nlmsg_trim;
- -      tcm = nlmsg_data(nlh);
- -      tcm->tcm_family = AF_UNSPEC;
- -      tcm->tcm__pad1 = 0;
- -      tcm->tcm__pad2 = 0;
- -      tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
- -      tcm->tcm_parent = tp->classid;
- -      tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
- -      if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
- -              goto nla_put_failure;
- -      if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
- -              goto nla_put_failure;
- -      tcm->tcm_handle = fh;
- -      if (RTM_DELTFILTER != event) {
- -              tcm->tcm_handle = 0;
- -              if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
- -                      goto nla_put_failure;
- -      }
- -      nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- -      return skb->len;
- -
- -out_nlmsg_trim:
- -nla_put_failure:
- -      nlmsg_trim(skb, b);
- -      return -1;
- -}
- -
- -static int tfilter_notify(struct net *net, struct sk_buff *oskb,
- -                        struct nlmsghdr *n, struct tcf_proto *tp,
- -                        unsigned long fh, int event, bool unicast)
- -{
- -      struct sk_buff *skb;
- -      u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- -
- -      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- -      if (!skb)
- -              return -ENOBUFS;
- -
- -      if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
- -                        n->nlmsg_flags, event) <= 0) {
- -              kfree_skb(skb);
- -              return -EINVAL;
- -      }
- -
- -      if (unicast)
- -              return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
- -
- -      return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- -                            n->nlmsg_flags & NLM_F_ECHO);
- -}
- -
   struct tcf_dump_args {
         struct tcf_walker w;
         struct sk_buff *skb;
         struct netlink_callback *cb;
   };
   
- -static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
- -                       struct tcf_walker *arg)
+ +static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
   {
         struct tcf_dump_args *a = (void *)arg;
         struct net *net = sock_net(a->skb->sk);
@@@ -908,12 -883,18 +908,12 @@@ int tcf_exts_validate(struct net *net, 
   }
   EXPORT_SYMBOL(tcf_exts_validate);
   
- -void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
- -                   struct tcf_exts *src)
+ +void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
   {
   #ifdef CONFIG_NET_CLS_ACT
         struct tcf_exts old = *dst;
   
- -      tcf_tree_lock(tp);
- -      dst->nr_actions = src->nr_actions;
- -      dst->actions = src->actions;
- -      dst->type = src->type;
- -      tcf_tree_unlock(tp);
- -
+ +      *dst = *src;
         tcf_exts_destroy(&old);
   #endif
   }
@@@ -934,7 -915,7 +934,7 @@@ int tcf_exts_dump(struct sk_buff *skb, 
   #ifdef CONFIG_NET_CLS_ACT
         struct nlattr *nest;
   
- -      if (exts->action && exts->nr_actions) {
+ +      if (exts->action && tcf_exts_has_actions(exts)) {
                 /*
                  * again for backward compatible mode - we want
                  * to work with both old and new modes of entering
@@@ -991,7 -972,7 +991,7 @@@ int tcf_exts_get_dev(struct net_device 
         const struct tc_action *a;
         LIST_HEAD(actions);
   
- -      if (tc_no_actions(exts))
+ +      if (!tcf_exts_has_actions(exts))
                 return -EINVAL;
   
         tcf_exts_to_list(exts, &actions);
@@@ -1010,10 -991,10 +1010,10 @@@ EXPORT_SYMBOL(tcf_exts_get_dev)
   
   static int __init tc_filter_init(void)
   {
- -      rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
- -      rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
+ +      rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
+ +      rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
         rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
- -                    tc_dump_tfilter, NULL);
+ +                    tc_dump_tfilter, 0);
   
         return 0;
   }
diff --combined net/sctp/ipv6.c

index a2a1c1d08d512d3515a5bc2ef2c9406cda385489,a4b6ffb6149541b78e39aceae859224d13487106..51c4887695909d171285b98ce1be779a3adedbab
--- 1/net/sctp/ipv6.c
--- 2/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@@ -243,8 -243,8 +243,8 @@@ static void sctp_v6_get_dst(struct sctp
         union sctp_addr *daddr = &t->ipaddr;
         union sctp_addr dst_saddr;
         struct in6_addr *final_p, final;
+ +      enum sctp_scope scope;
         __u8 matchlen = 0;
- -      sctp_scope_t scope;
   
         memset(fl6, 0, sizeof(struct flowi6));
         fl6->daddr = daddr->v6.sin6_addr;
@@@ -497,7 -497,7 +497,7 @@@ static void sctp_v6_from_addr_param(uni
   static int sctp_v6_to_addr_param(const union sctp_addr *addr,
                                  union sctp_addr_param *param)
   {
- -      int length = sizeof(sctp_ipv6addr_param_t);
+ +      int length = sizeof(struct sctp_ipv6addr_param);
   
         param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
         param->v6.param_hdr.length = htons(length);
@@@ -512,7 -512,9 +512,9 @@@ static void sctp_v6_to_addr(union sctp_
   {
         addr->sa.sa_family = AF_INET6;
         addr->v6.sin6_port = port;
+       addr->v6.sin6_flowinfo = 0;
         addr->v6.sin6_addr = *saddr;
+       addr->v6.sin6_scope_id = 0;
   }
   
   /* Compare addresses exactly.
@@@ -624,10 -626,10 +626,10 @@@ static int sctp_v6_addr_valid(union sct
   }
   
   /* What is the scope of 'addr'?  */
- -static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
+ +static enum sctp_scope sctp_v6_scope(union sctp_addr *addr)
   {
+ +      enum sctp_scope retval;
         int v6scope;
- -      sctp_scope_t retval;
   
         /* The IPv6 scope is really a set of bit fields.
          * See IFA_* in <net/if_inet6.h>.  Map to a generic SCTP scope.
diff --combined net/unix/af_unix.c

index 5c53f22d62e8d6ef79eca921ae5a361443d73385,be8982b4f8c00be8bb95748c1c33a76e13079dff..7f46bab4ce5c84aa285d8141b4e0f822e8dab01f
--- 1/net/unix/af_unix.c
--- 2/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@@ -1528,13 -1528,26 +1528,13 @@@ static inline bool too_many_unix_fds(st
         return false;
   }
   
- -#define MAX_RECURSION_LEVEL 4
- -
   static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
   {
         int i;
- -      unsigned char max_level = 0;
   
         if (too_many_unix_fds(current))
                 return -ETOOMANYREFS;
   
- -      for (i = scm->fp->count - 1; i >= 0; i--) {
- -              struct sock *sk = unix_get_socket(scm->fp->fp[i]);
- -
- -              if (sk)
- -                      max_level = max(max_level,
- -                                      unix_sk(sk)->recursion_level);
- -      }
- -      if (unlikely(max_level > MAX_RECURSION_LEVEL))
- -              return -ETOOMANYREFS;
- -
         /*
          * Need to duplicate file references for the sake of garbage
          * collection.  Otherwise a socket in the fps might become a
@@@ -1546,7 -1559,7 +1546,7 @@@
   
         for (i = scm->fp->count - 1; i >= 0; i--)
                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
- -      return max_level;
+ +      return 0;
   }
   
   static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@@ -1636,6 -1649,7 +1636,6 @@@ static int unix_dgram_sendmsg(struct so
         struct sk_buff *skb;
         long timeo;
         struct scm_cookie scm;
- -      int max_level;
         int data_len = 0;
         int sk_locked;
   
@@@ -1687,6 -1701,7 +1687,6 @@@
         err = unix_scm_to_skb(&scm, skb, true);
         if (err < 0)
                 goto out_free;
- -      max_level = err + 1;
   
         skb_put(skb, len - data_len);
         skb->data_len = data_len;
@@@ -1804,6 -1819,8 +1804,6 @@@ restart_locked
                 __net_timestamp(skb);
         maybe_add_creds(skb, sock, other);
         skb_queue_tail(&other->sk_receive_queue, skb);
- -      if (max_level > unix_sk(other)->recursion_level)
- -              unix_sk(other)->recursion_level = max_level;
         unix_state_unlock(other);
         other->sk_data_ready(other);
         sock_put(other);
@@@ -1838,6 -1855,7 +1838,6 @@@ static int unix_stream_sendmsg(struct s
         int sent = 0;
         struct scm_cookie scm;
         bool fds_sent = false;
- -      int max_level;
         int data_len;
   
         wait_for_unix_gc();
@@@ -1887,6 -1905,7 +1887,6 @@@
                         kfree_skb(skb);
                         goto out_err;
                 }
- -              max_level = err + 1;
                 fds_sent = true;
   
                 skb_put(skb, size - data_len);
@@@ -1906,6 -1925,8 +1906,6 @@@
   
                 maybe_add_creds(skb, sock, other);
                 skb_queue_tail(&other->sk_receive_queue, skb);
- -              if (max_level > unix_sk(other)->recursion_level)
- -                      unix_sk(other)->recursion_level = max_level;
                 unix_state_unlock(other);
                 other->sk_data_ready(other);
                 sent += size;
@@@ -2283,10 -2304,7 +2283,7 @@@ static int unix_stream_read_generic(str
          */
         mutex_lock(&u->iolock);
   
-       if (flags & MSG_PEEK)
-               skip = sk_peek_offset(sk, flags);
-       else
-               skip = 0;
+       skip = max(sk_peek_offset(sk, flags), 0);
   
         do {
                 int chunk;
@@@ -2303,6 -2321,7 +2300,6 @@@ redo
                 last_len = last ? last->len : 0;
   again:
                 if (skb == NULL) {
- -                      unix_sk(sk)->recursion_level = 0;
                         if (copied >= target)
                                 goto unlock;
   
diff --combined tools/lib/bpf/libbpf.c

index 1cc3ea0ffdc3b38bf56b8c531374859501febd83,8c67a90dbd8229062fe1c0a9ae317fd5f3c1689b..35f6dfcdc56518528b964f04d7bc7033e1a2b36b
--- 1/tools/lib/bpf/libbpf.c
--- 2/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@@ -879,7 -879,8 +879,8 @@@ bpf_object__create_maps(struct bpf_obje
                         size_t j;
                         int err = *pfd;
   
-                       pr_warning("failed to create map: %s\n",
+                       pr_warning("failed to create map (name: '%s'): %s\n",
+                                  obj->maps[i].name,
                                    strerror(errno));
                         for (j = 0; j < i; j++)
                                 zclose(obj->maps[j].fd);
@@@ -1744,32 -1745,3 +1745,32 @@@ long libbpf_get_error(const void *ptr
                 return PTR_ERR(ptr);
         return 0;
   }
+ +
+ +int bpf_prog_load(const char *file, enum bpf_prog_type type,
+ +                struct bpf_object **pobj, int *prog_fd)
+ +{
+ +      struct bpf_program *prog;
+ +      struct bpf_object *obj;
+ +      int err;
+ +
+ +      obj = bpf_object__open(file);
+ +      if (IS_ERR(obj))
+ +              return -ENOENT;
+ +
+ +      prog = bpf_program__next(NULL, obj);
+ +      if (!prog) {
+ +              bpf_object__close(obj);
+ +              return -ENOENT;
+ +      }
+ +
+ +      bpf_program__set_type(prog, type);
+ +      err = bpf_object__load(obj);
+ +      if (err) {
+ +              bpf_object__close(obj);
+ +              return -EINVAL;
+ +      }
+ +
+ +      *pobj = obj;
+ +      *prog_fd = bpf_program__fd(prog);
+ +      return 0;
+ +}
author	David S. Miller <davem@davemloft.net>
	Tue, 22 Aug 2017 00:06:42 +0000 (17:06 -0700)
committer	David S. Miller <davem@davemloft.net>
	Tue, 22 Aug 2017 00:06:42 +0000 (17:06 -0700)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/mellanox/mlx4/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/netronome/nfp/nfp_net_common.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/tun.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/ip.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/sch_generic.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/sock.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/datagram.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/igmp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/route.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp_input.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/udp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/ip6_fib.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/udp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/openvswitch/datapath.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sched/cls_api.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sctp/ipv6.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/unix/af_unix.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/lib/bpf/libbpf.c	patch \|	diff1 \|	diff2 \|	blob \| history