Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 28 Nov 2018 20:53:48 +0000 (12:53 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 28 Nov 2018 20:53:48 +0000 (12:53 -0800)
Pull networking fixes from David Miller:

 1) ARM64 JIT fixes for subprog handling from Daniel Borkmann.

 2) Various sparc64 JIT bug fixes (fused branch convergance, frame
    pointer usage detection logic, PSEODU call argument handling).

 3) Fix to use BH locking in nf_conncount, from Taehee Yoo.

 4) Fix race of TX skb freeing in ipheth driver, from Bernd Eckstein.

 5) Handle return value of TX NAPI completion properly in lan743x
    driver, from Bryan Whitehead.

 6) MAC filter deletion in i40e driver clears wrong state bit, from
    Lihong Yang.

 7) Fix use after free in rionet driver, from Pan Bian.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (53 commits)
  s390/qeth: fix length check in SNMP processing
  net: hisilicon: remove unexpected free_netdev
  rapidio/rionet: do not free skb before reading its length
  i40e: fix kerneldoc for xsk methods
  ixgbe: recognize 1000BaseLX SFP modules as 1Gbps
  i40e: Fix deletion of MAC filters
  igb: fix uninitialized variables
  netfilter: nf_tables: deactivate expressions in rule replecement routine
  lan743x: Enable driver to work with LAN7431
  tipc: fix lockdep warning during node delete
  lan743x: fix return value for lan743x_tx_napi_poll
  net: via: via-velocity: fix spelling mistake "alignement" -> "alignment"
  qed: fix spelling mistake "attnetion" -> "attention"
  net: thunderx: fix NULL pointer dereference in nic_remove
  sctp: increase sk_wmem_alloc when head->truesize is increased
  firestream: fix spelling mistake: "Inititing" -> "Initializing"
  net: phy: add workaround for issue where PHY driver doesn't bind to the device
  usbnet: ipheth: fix potential recvmsg bug and recvmsg bug 2
  sparc: Adjust bpf JIT prologue for PSEUDO calls.
  bpf, doc: add entries of who looks over which jits
  ...

68 files changed:
MAINTAINERS
arch/arm64/net/bpf_jit_comp.c
arch/powerpc/net/bpf_jit_comp64.c
arch/sparc/net/bpf_jit_comp_64.c
drivers/atm/firestream.c
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/hisilicon/hip04_eth.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_xsk.c
drivers/net/ethernet/intel/igb/e1000_i210.c
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/microchip/lan743x_main.h
drivers/net/ethernet/qlogic/qed/qed_debug.c
drivers/net/ethernet/via/via-velocity.c
drivers/net/phy/phy_device.c
drivers/net/rionet.c
drivers/net/usb/ipheth.c
drivers/s390/net/qeth_core_main.c
include/linux/filter.h
include/linux/netfilter/nf_conntrack_proto_gre.h
include/net/netfilter/ipv4/nf_nat_masquerade.h
include/net/netfilter/ipv6/nf_nat_masquerade.h
kernel/bpf/core.c
kernel/bpf/local_storage.c
kernel/bpf/queue_stack_maps.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
net/core/filter.c
net/ipv4/ip_output.c
net/ipv4/netfilter/ipt_MASQUERADE.c
net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
net/ipv4/netfilter/nft_masq_ipv4.c
net/ipv4/tcp_input.c
net/ipv4/tcp_timer.c
net/ipv6/ip6_output.c
net/ipv6/netfilter.c
net/ipv6/netfilter/ip6t_MASQUERADE.c
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
net/ipv6/netfilter/nft_masq_ipv6.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_conncount.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nft_compat.c
net/netfilter/nft_flow_offload.c
net/netfilter/xt_RATEEST.c
net/netfilter/xt_hashlimit.c
net/sctp/output.c
net/tipc/node.c
tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
tools/bpf/bpftool/Documentation/bpftool-map.rst
tools/bpf/bpftool/Documentation/bpftool-net.rst
tools/bpf/bpftool/Documentation/bpftool-perf.rst
tools/bpf/bpftool/Documentation/bpftool-prog.rst
tools/bpf/bpftool/Documentation/bpftool.rst
tools/bpf/bpftool/common.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/prog.c
tools/include/uapi/linux/pkt_cls.h [new file with mode: 0644]
tools/include/uapi/linux/tc_act/tc_bpf.h [new file with mode: 0644]
tools/testing/selftests/Makefile
tools/testing/selftests/bpf/test_netcnt.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/netfilter/Makefile [new file with mode: 0644]
tools/testing/selftests/netfilter/config [new file with mode: 0644]
tools/testing/selftests/netfilter/nft_trans_stress.sh [new file with mode: 0755]

index 380e43f585d3416bbc1e24f0240a9852eafce597..da57abebaab35a95a4f94d5db63c74b550c08c9c 100644 (file)
@@ -2801,7 +2801,7 @@ T:        git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
 Q:     https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
 S:     Supported
-F:     arch/x86/net/bpf_jit*
+F:     arch/*/net/*
 F:     Documentation/networking/filter.txt
 F:     Documentation/bpf/
 F:     include/linux/bpf*
@@ -2821,6 +2821,67 @@ F:       tools/bpf/
 F:     tools/lib/bpf/
 F:     tools/testing/selftests/bpf/
 
+BPF JIT for ARM
+M:     Shubham Bansal <illusionist.neo@gmail.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     arch/arm/net/
+
+BPF JIT for ARM64
+M:     Daniel Borkmann <daniel@iogearbox.net>
+M:     Alexei Starovoitov <ast@kernel.org>
+M:     Zi Shen Lim <zlim.lnx@gmail.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     arch/arm64/net/
+
+BPF JIT for MIPS (32-BIT AND 64-BIT)
+M:     Paul Burton <paul.burton@mips.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     arch/mips/net/
+
+BPF JIT for NFP NICs
+M:     Jakub Kicinski <jakub.kicinski@netronome.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     drivers/net/ethernet/netronome/nfp/bpf/
+
+BPF JIT for POWERPC (32-BIT AND 64-BIT)
+M:     Naveen N. Rao <naveen.n.rao@linux.ibm.com>
+M:     Sandipan Das <sandipan@linux.ibm.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     arch/powerpc/net/
+
+BPF JIT for S390
+M:     Martin Schwidefsky <schwidefsky@de.ibm.com>
+M:     Heiko Carstens <heiko.carstens@de.ibm.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     arch/s390/net/
+X:     arch/s390/net/pnet.c
+
+BPF JIT for SPARC (32-BIT AND 64-BIT)
+M:     David S. Miller <davem@davemloft.net>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     arch/sparc/net/
+
+BPF JIT for X86 32-BIT
+M:     Wang YanQing <udknight@gmail.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     arch/x86/net/bpf_jit_comp32.c
+
+BPF JIT for X86 64-BIT
+M:     Alexei Starovoitov <ast@kernel.org>
+M:     Daniel Borkmann <daniel@iogearbox.net>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     arch/x86/net/
+X:     arch/x86/net/bpf_jit_comp32.c
+
 BROADCOM B44 10/100 ETHERNET DRIVER
 M:     Michael Chan <michael.chan@broadcom.com>
 L:     netdev@vger.kernel.org
index a6fdaea07c6339cf2754d582765747ee5d8b2ff5..89198017e8e681268504235331471b38b7e945b9 100644 (file)
@@ -351,7 +351,8 @@ static void build_epilogue(struct jit_ctx *ctx)
  * >0 - successfully JITed a 16-byte eBPF instruction.
  * <0 - failed to JIT.
  */
-static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+                     bool extra_pass)
 {
        const u8 code = insn->code;
        const u8 dst = bpf2a64[insn->dst_reg];
@@ -625,12 +626,19 @@ emit_cond_jmp:
        case BPF_JMP | BPF_CALL:
        {
                const u8 r0 = bpf2a64[BPF_REG_0];
-               const u64 func = (u64)__bpf_call_base + imm;
+               bool func_addr_fixed;
+               u64 func_addr;
+               int ret;
 
-               if (ctx->prog->is_func)
-                       emit_addr_mov_i64(tmp, func, ctx);
+               ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
+                                           &func_addr, &func_addr_fixed);
+               if (ret < 0)
+                       return ret;
+               if (func_addr_fixed)
+                       /* We can use optimized emission here. */
+                       emit_a64_mov_i64(tmp, func_addr, ctx);
                else
-                       emit_a64_mov_i64(tmp, func, ctx);
+                       emit_addr_mov_i64(tmp, func_addr, ctx);
                emit(A64_BLR(tmp), ctx);
                emit(A64_MOV(1, r0, A64_R(0)), ctx);
                break;
@@ -753,7 +761,7 @@ emit_cond_jmp:
        return 0;
 }
 
-static int build_body(struct jit_ctx *ctx)
+static int build_body(struct jit_ctx *ctx, bool extra_pass)
 {
        const struct bpf_prog *prog = ctx->prog;
        int i;
@@ -762,7 +770,7 @@ static int build_body(struct jit_ctx *ctx)
                const struct bpf_insn *insn = &prog->insnsi[i];
                int ret;
 
-               ret = build_insn(insn, ctx);
+               ret = build_insn(insn, ctx, extra_pass);
                if (ret > 0) {
                        i++;
                        if (ctx->image == NULL)
@@ -858,7 +866,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
        /* 1. Initial fake pass to compute ctx->idx. */
 
        /* Fake pass to fill in ctx->offset. */
-       if (build_body(&ctx)) {
+       if (build_body(&ctx, extra_pass)) {
                prog = orig_prog;
                goto out_off;
        }
@@ -888,7 +896,7 @@ skip_init_ctx:
 
        build_prologue(&ctx, was_classic);
 
-       if (build_body(&ctx)) {
+       if (build_body(&ctx, extra_pass)) {
                bpf_jit_binary_free(header);
                prog = orig_prog;
                goto out_off;
index 50b129785aeeead06f8d131a64d30a6ccda2576e..17482f5de3e262f53d67b4199490660362b394a0 100644 (file)
@@ -166,7 +166,33 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
        PPC_BLR();
 }
 
-static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
+static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
+                                      u64 func)
+{
+#ifdef PPC64_ELF_ABI_v1
+       /* func points to the function descriptor */
+       PPC_LI64(b2p[TMP_REG_2], func);
+       /* Load actual entry point from function descriptor */
+       PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
+       /* ... and move it to LR */
+       PPC_MTLR(b2p[TMP_REG_1]);
+       /*
+        * Load TOC from function descriptor at offset 8.
+        * We can clobber r2 since we get called through a
+        * function pointer (so caller will save/restore r2)
+        * and since we don't use a TOC ourself.
+        */
+       PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
+#else
+       /* We can clobber r12 */
+       PPC_FUNC_ADDR(12, func);
+       PPC_MTLR(12);
+#endif
+       PPC_BLRL();
+}
+
+static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
+                                      u64 func)
 {
        unsigned int i, ctx_idx = ctx->idx;
 
@@ -273,7 +299,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 {
        const struct bpf_insn *insn = fp->insnsi;
        int flen = fp->len;
-       int i;
+       int i, ret;
 
        /* Start of epilogue code - will only be valid 2nd pass onwards */
        u32 exit_addr = addrs[flen];
@@ -284,8 +310,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
                u32 src_reg = b2p[insn[i].src_reg];
                s16 off = insn[i].off;
                s32 imm = insn[i].imm;
+               bool func_addr_fixed;
+               u64 func_addr;
                u64 imm64;
-               u8 *func;
                u32 true_cond;
                u32 tmp_idx;
 
@@ -711,23 +738,15 @@ emit_clear:
                case BPF_JMP | BPF_CALL:
                        ctx->seen |= SEEN_FUNC;
 
-                       /* bpf function call */
-                       if (insn[i].src_reg == BPF_PSEUDO_CALL)
-                               if (!extra_pass)
-                                       func = NULL;
-                               else if (fp->aux->func && off < fp->aux->func_cnt)
-                                       /* use the subprog id from the off
-                                        * field to lookup the callee address
-                                        */
-                                       func = (u8 *) fp->aux->func[off]->bpf_func;
-                               else
-                                       return -EINVAL;
-                       /* kernel helper call */
-                       else
-                               func = (u8 *) __bpf_call_base + imm;
-
-                       bpf_jit_emit_func_call(image, ctx, (u64)func);
+                       ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+                                                   &func_addr, &func_addr_fixed);
+                       if (ret < 0)
+                               return ret;
 
+                       if (func_addr_fixed)
+                               bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
+                       else
+                               bpf_jit_emit_func_call_rel(image, ctx, func_addr);
                        /* move return value from r3 to BPF_REG_0 */
                        PPC_MR(b2p[BPF_REG_0], 3);
                        break;
index 222785af550b46736676808b6e00d8d8cef9a286..5fda4f7bf15d176fbb913435ea0a25c510c8df5a 100644 (file)
@@ -791,7 +791,7 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 }
 
 /* Just skip the save instruction and the ctx register move.  */
-#define BPF_TAILCALL_PROLOGUE_SKIP     16
+#define BPF_TAILCALL_PROLOGUE_SKIP     32
 #define BPF_TAILCALL_CNT_SP_OFF                (STACK_BIAS + 128)
 
 static void build_prologue(struct jit_ctx *ctx)
@@ -824,9 +824,15 @@ static void build_prologue(struct jit_ctx *ctx)
                const u8 vfp = bpf2sparc[BPF_REG_FP];
 
                emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx);
+       } else {
+               emit_nop(ctx);
        }
 
        emit_reg_move(I0, O0, ctx);
+       emit_reg_move(I1, O1, ctx);
+       emit_reg_move(I2, O2, ctx);
+       emit_reg_move(I3, O3, ctx);
+       emit_reg_move(I4, O4, ctx);
        /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
 }
 
@@ -1270,6 +1276,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                const u8 tmp2 = bpf2sparc[TMP_REG_2];
                u32 opcode = 0, rs2;
 
+               if (insn->dst_reg == BPF_REG_FP)
+                       ctx->saw_frame_pointer = true;
+
                ctx->tmp_2_used = true;
                emit_loadimm(imm, tmp2, ctx);
 
@@ -1308,6 +1317,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                const u8 tmp = bpf2sparc[TMP_REG_1];
                u32 opcode = 0, rs2;
 
+               if (insn->dst_reg == BPF_REG_FP)
+                       ctx->saw_frame_pointer = true;
+
                switch (BPF_SIZE(code)) {
                case BPF_W:
                        opcode = ST32;
@@ -1340,6 +1352,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                const u8 tmp2 = bpf2sparc[TMP_REG_2];
                const u8 tmp3 = bpf2sparc[TMP_REG_3];
 
+               if (insn->dst_reg == BPF_REG_FP)
+                       ctx->saw_frame_pointer = true;
+
                ctx->tmp_1_used = true;
                ctx->tmp_2_used = true;
                ctx->tmp_3_used = true;
@@ -1360,6 +1375,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                const u8 tmp2 = bpf2sparc[TMP_REG_2];
                const u8 tmp3 = bpf2sparc[TMP_REG_3];
 
+               if (insn->dst_reg == BPF_REG_FP)
+                       ctx->saw_frame_pointer = true;
+
                ctx->tmp_1_used = true;
                ctx->tmp_2_used = true;
                ctx->tmp_3_used = true;
@@ -1425,12 +1443,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
        struct bpf_prog *tmp, *orig_prog = prog;
        struct sparc64_jit_data *jit_data;
        struct bpf_binary_header *header;
+       u32 prev_image_size, image_size;
        bool tmp_blinded = false;
        bool extra_pass = false;
        struct jit_ctx ctx;
-       u32 image_size;
        u8 *image_ptr;
-       int pass;
+       int pass, i;
 
        if (!prog->jit_requested)
                return orig_prog;
@@ -1461,61 +1479,82 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                header = jit_data->header;
                extra_pass = true;
                image_size = sizeof(u32) * ctx.idx;
+               prev_image_size = image_size;
+               pass = 1;
                goto skip_init_ctx;
        }
 
        memset(&ctx, 0, sizeof(ctx));
        ctx.prog = prog;
 
-       ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL);
+       ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL);
        if (ctx.offset == NULL) {
                prog = orig_prog;
                goto out_off;
        }
 
-       /* Fake pass to detect features used, and get an accurate assessment
-        * of what the final image size will be.
+       /* Longest sequence emitted is for bswap32, 12 instructions.  Pre-cook
+        * the offset array so that we converge faster.
         */
-       if (build_body(&ctx)) {
-               prog = orig_prog;
-               goto out_off;
-       }
-       build_prologue(&ctx);
-       build_epilogue(&ctx);
-
-       /* Now we know the actual image size. */
-       image_size = sizeof(u32) * ctx.idx;
-       header = bpf_jit_binary_alloc(image_size, &image_ptr,
-                                     sizeof(u32), jit_fill_hole);
-       if (header == NULL) {
-               prog = orig_prog;
-               goto out_off;
-       }
+       for (i = 0; i < prog->len; i++)
+               ctx.offset[i] = i * (12 * 4);
 
-       ctx.image = (u32 *)image_ptr;
-skip_init_ctx:
-       for (pass = 1; pass < 3; pass++) {
+       prev_image_size = ~0U;
+       for (pass = 1; pass < 40; pass++) {
                ctx.idx = 0;
 
                build_prologue(&ctx);
-
                if (build_body(&ctx)) {
-                       bpf_jit_binary_free(header);
                        prog = orig_prog;
                        goto out_off;
                }
-
                build_epilogue(&ctx);
 
                if (bpf_jit_enable > 1)
-                       pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
-                               image_size - (ctx.idx * 4),
+                       pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass,
+                               ctx.idx * 4,
                                ctx.tmp_1_used ? '1' : ' ',
                                ctx.tmp_2_used ? '2' : ' ',
                                ctx.tmp_3_used ? '3' : ' ',
                                ctx.saw_frame_pointer ? 'F' : ' ',
                                ctx.saw_call ? 'C' : ' ',
                                ctx.saw_tail_call ? 'T' : ' ');
+
+               if (ctx.idx * 4 == prev_image_size)
+                       break;
+               prev_image_size = ctx.idx * 4;
+               cond_resched();
+       }
+
+       /* Now we know the actual image size. */
+       image_size = sizeof(u32) * ctx.idx;
+       header = bpf_jit_binary_alloc(image_size, &image_ptr,
+                                     sizeof(u32), jit_fill_hole);
+       if (header == NULL) {
+               prog = orig_prog;
+               goto out_off;
+       }
+
+       ctx.image = (u32 *)image_ptr;
+skip_init_ctx:
+       ctx.idx = 0;
+
+       build_prologue(&ctx);
+
+       if (build_body(&ctx)) {
+               bpf_jit_binary_free(header);
+               prog = orig_prog;
+               goto out_off;
+       }
+
+       build_epilogue(&ctx);
+
+       if (ctx.idx * 4 != prev_image_size) {
+               pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n",
+                      prev_image_size, ctx.idx * 4);
+               bpf_jit_binary_free(header);
+               prog = orig_prog;
+               goto out_off;
        }
 
        if (bpf_jit_enable > 1)
index 4e46dc9e41ad01142ffdcfe1c38921df9db0ebda..11e1663bdc4dee0e2cfd7cd9ba61783d00277bbf 100644 (file)
@@ -1410,7 +1410,7 @@ static int init_q(struct fs_dev *dev, struct queue *txq, int queue,
 
        func_enter ();
 
-       fs_dprintk (FS_DEBUG_INIT, "Inititing queue at %x: %d entries:\n", 
+       fs_dprintk (FS_DEBUG_INIT, "Initializing queue at %x: %d entries:\n",
                    queue, nentries);
 
        p = aligned_kmalloc (sz, GFP_KERNEL, 0x10);
@@ -1443,7 +1443,7 @@ static int init_fp(struct fs_dev *dev, struct freepool *fp, int queue,
 {
        func_enter ();
 
-       fs_dprintk (FS_DEBUG_INIT, "Inititing free pool at %x:\n", queue);
+       fs_dprintk (FS_DEBUG_INIT, "Initializing free pool at %x:\n", queue);
 
        write_fs (dev, FP_CNF(queue), (bufsize * RBFP_RBS) | RBFP_RBSVAL | RBFP_CME);
        write_fs (dev, FP_SA(queue),  0);
index 55af04fa03a77e850196e82e930e3f85af7c6aa7..6c8dcb65ff031d230303604c2071797027bf11a4 100644 (file)
@@ -1441,6 +1441,9 @@ static void nic_remove(struct pci_dev *pdev)
 {
        struct nicpf *nic = pci_get_drvdata(pdev);
 
+       if (!nic)
+               return;
+
        if (nic->flags & NIC_SRIOV_ENABLED)
                pci_disable_sriov(pdev);
 
index be268dcde8fa2a5db3fd392237d583a4c5cc0d5e..f9a4e76c5a8b73799c61cb7d3a260cc50f9287ee 100644 (file)
@@ -915,10 +915,8 @@ static int hip04_mac_probe(struct platform_device *pdev)
        }
 
        ret = register_netdev(ndev);
-       if (ret) {
-               free_netdev(ndev);
+       if (ret)
                goto alloc_fail;
-       }
 
        return 0;
 
index 21c2688d63082ec25bb6f72e921d39ac3beedee8..a3f45335437c3cecde089e44c3a13a3932001960 100644 (file)
@@ -1413,7 +1413,7 @@ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
        }
 
        vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-       set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->state);
+       set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
 }
 
 /**
index add1e457886df5e40a11388738b7580bc3a2508c..433c8e688c78d5623e65fe5edefeae4cc22d67a8 100644 (file)
@@ -33,7 +33,7 @@ static int i40e_alloc_xsk_umems(struct i40e_vsi *vsi)
 }
 
 /**
- * i40e_add_xsk_umem - Store an UMEM for a certain ring/qid
+ * i40e_add_xsk_umem - Store a UMEM for a certain ring/qid
  * @vsi: Current VSI
  * @umem: UMEM to store
  * @qid: Ring/qid to associate with the UMEM
@@ -56,7 +56,7 @@ static int i40e_add_xsk_umem(struct i40e_vsi *vsi, struct xdp_umem *umem,
 }
 
 /**
- * i40e_remove_xsk_umem - Remove an UMEM for a certain ring/qid
+ * i40e_remove_xsk_umem - Remove a UMEM for a certain ring/qid
  * @vsi: Current VSI
  * @qid: Ring/qid associated with the UMEM
  **/
@@ -130,7 +130,7 @@ static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem)
 }
 
 /**
- * i40e_xsk_umem_enable - Enable/associate an UMEM to a certain ring/qid
+ * i40e_xsk_umem_enable - Enable/associate a UMEM to a certain ring/qid
  * @vsi: Current VSI
  * @umem: UMEM
  * @qid: Rx ring to associate UMEM to
@@ -189,7 +189,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 }
 
 /**
- * i40e_xsk_umem_disable - Diassociate an UMEM from a certain ring/qid
+ * i40e_xsk_umem_disable - Disassociate a UMEM from a certain ring/qid
  * @vsi: Current VSI
  * @qid: Rx ring to associate UMEM to
  *
@@ -255,12 +255,12 @@ int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
 }
 
 /**
- * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM
+ * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
  * @vsi: Current VSI
  * @umem: UMEM to enable/associate to a ring, or NULL to disable
  * @qid: Rx ring to (dis)associate UMEM (from)to
  *
- * This function enables or disables an UMEM to a certain ring.
+ * This function enables or disables a UMEM to a certain ring.
  *
  * Returns 0 on success, <0 on failure
  **/
@@ -276,7 +276,7 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
  * @rx_ring: Rx ring
  * @xdp: xdp_buff used as input to the XDP program
  *
- * This function enables or disables an UMEM to a certain ring.
+ * This function enables or disables a UMEM to a certain ring.
  *
  * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR}
  **/
index c54ebedca6da9a3ddaeff1b43cde2993438c4714..c393cb2c0f1681f702a8b648f21ef49c3c8a565d 100644 (file)
@@ -842,6 +842,7 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw)
                nvm_word = E1000_INVM_DEFAULT_AL;
        tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL;
        igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, E1000_PHY_PLL_FREQ_PAGE);
+       phy_word = E1000_PHY_PLL_UNCONF;
        for (i = 0; i < E1000_MAX_PLL_TRIES; i++) {
                /* check current state directly from internal PHY */
                igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, &phy_word);
index 10dbaf4f6e808d7e2e19e6a560f11213eb1b2b87..9c42f741ed5efde3ad667bbc75d4a6b5ea5dec15 100644 (file)
@@ -2262,7 +2262,9 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
                *autoneg = false;
 
                if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
-                   hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) {
+                   hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 ||
+                   hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+                   hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) {
                        *speed = IXGBE_LINK_SPEED_1GB_FULL;
                        return 0;
                }
index 867cddba840feb07aa93c1b58908023572951e7a..e8ca98c070f68443c6460ecb10d3eb4b3ee9a2f2 100644 (file)
@@ -1672,7 +1672,7 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
                netif_wake_queue(adapter->netdev);
        }
 
-       if (!napi_complete_done(napi, weight))
+       if (!napi_complete(napi))
                goto done;
 
        /* enable isr */
@@ -1681,7 +1681,7 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
        lan743x_csr_read(adapter, INT_STS);
 
 done:
-       return weight;
+       return 0;
 }
 
 static void lan743x_tx_ring_cleanup(struct lan743x_tx *tx)
@@ -1870,9 +1870,9 @@ static int lan743x_tx_open(struct lan743x_tx *tx)
        tx->vector_flags = lan743x_intr_get_vector_flags(adapter,
                                                         INT_BIT_DMA_TX_
                                                         (tx->channel_number));
-       netif_napi_add(adapter->netdev,
-                      &tx->napi, lan743x_tx_napi_poll,
-                      tx->ring_size - 1);
+       netif_tx_napi_add(adapter->netdev,
+                         &tx->napi, lan743x_tx_napi_poll,
+                         tx->ring_size - 1);
        napi_enable(&tx->napi);
 
        data = 0;
@@ -3017,6 +3017,7 @@ static const struct dev_pm_ops lan743x_pm_ops = {
 
 static const struct pci_device_id lan743x_pcidev_tbl[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
+       { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7431) },
        { 0, }
 };
 
index 0e82b6368798a2cf02cfef922b4feffa2ff779d1..2d6eea18973e8f4c8b5c733d825a5a0cc8492c39 100644 (file)
@@ -548,6 +548,7 @@ struct lan743x_adapter;
 /* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */
 #define PCI_VENDOR_ID_SMSC             PCI_VENDOR_ID_EFAR
 #define PCI_DEVICE_ID_SMSC_LAN7430     (0x7430)
+#define PCI_DEVICE_ID_SMSC_LAN7431     (0x7431)
 
 #define PCI_CONFIG_LENGTH              (0x1000)
 
index 78a638ec7c0aee931c9d56aa278430891e76562f..979f1e4bc18bfbc38946198ea6667ff516edc580 100644 (file)
@@ -6071,7 +6071,7 @@ static const char * const s_igu_fifo_error_strs[] = {
        "no error",
        "length error",
        "function disabled",
-       "VF sent command to attnetion address",
+       "VF sent command to attention address",
        "host sent prod update command",
        "read of during interrupt register while in MIMD mode",
        "access to PXP BAR reserved address",
index ef9538ee53d0db7f43eae4298dd39258b4c39122..82412691ee66bf13b488db6d61072f239d27d9c3 100644 (file)
@@ -3605,7 +3605,7 @@ static const char velocity_gstrings[][ETH_GSTRING_LEN] = {
        "tx_jumbo",
        "rx_mac_control_frames",
        "tx_mac_control_frames",
-       "rx_frame_alignement_errors",
+       "rx_frame_alignment_errors",
        "rx_long_ok",
        "rx_long_err",
        "tx_sqe_errors",
index ab33d1777132e62a141e7ca76c11ef694a6f4e63..23ee3967c166713bac24906f1705a53c8eed8d70 100644 (file)
@@ -2197,6 +2197,14 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
        new_driver->mdiodrv.driver.remove = phy_remove;
        new_driver->mdiodrv.driver.owner = owner;
 
+       /* The following works around an issue where the PHY driver doesn't bind
+        * to the device, resulting in the genphy driver being used instead of
+        * the dedicated driver. The root cause of the issue isn't known yet
+        * and seems to be in the base driver core. Once this is fixed we may
+        * remove this workaround.
+        */
+       new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
+
        retval = driver_register(&new_driver->mdiodrv.driver);
        if (retval) {
                pr_err("%s: Error %d in registering driver\n",
index e9f101c9bae2ce1d9bde5dbe0d473119ead760e6..bfbb39f935545794c151c18de29655a574fd8bcd 100644 (file)
@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                         * it just report sending a packet to the target
                         * (without actual packet transfer).
                         */
-                       dev_kfree_skb_any(skb);
                        ndev->stats.tx_packets++;
                        ndev->stats.tx_bytes += skb->len;
+                       dev_kfree_skb_any(skb);
                }
        }
 
index 7275761a1177ca9cda569bfc734bb6de3e1558e1..3d8a70d3ea9bd67c91f85c6cab38d1afbf5f25f3 100644 (file)
@@ -140,7 +140,6 @@ struct ipheth_device {
        struct usb_device *udev;
        struct usb_interface *intf;
        struct net_device *net;
-       struct sk_buff *tx_skb;
        struct urb *tx_urb;
        struct urb *rx_urb;
        unsigned char *tx_buf;
@@ -230,6 +229,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
        case -ENOENT:
        case -ECONNRESET:
        case -ESHUTDOWN:
+       case -EPROTO:
                return;
        case 0:
                break;
@@ -281,7 +281,6 @@ static void ipheth_sndbulk_callback(struct urb *urb)
                dev_err(&dev->intf->dev, "%s: urb status: %d\n",
                __func__, status);
 
-       dev_kfree_skb_irq(dev->tx_skb);
        if (status == 0)
                netif_wake_queue(dev->net);
        else
@@ -423,7 +422,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
        if (skb->len > IPHETH_BUF_SIZE) {
                WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len);
                dev->net->stats.tx_dropped++;
-               dev_kfree_skb_irq(skb);
+               dev_kfree_skb_any(skb);
                return NETDEV_TX_OK;
        }
 
@@ -443,12 +442,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
                dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n",
                        __func__, retval);
                dev->net->stats.tx_errors++;
-               dev_kfree_skb_irq(skb);
+               dev_kfree_skb_any(skb);
        } else {
-               dev->tx_skb = skb;
-
                dev->net->stats.tx_packets++;
                dev->net->stats.tx_bytes += skb->len;
+               dev_consume_skb_any(skb);
                netif_stop_queue(net);
        }
 
index 4bce5ae65a55c193ec84c0d64137adde497daf6b..254065271867146bc91db048116371ac5d3b2867 100644 (file)
@@ -4518,8 +4518,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
 {
        struct qeth_ipa_cmd *cmd;
        struct qeth_arp_query_info *qinfo;
-       struct qeth_snmp_cmd *snmp;
        unsigned char *data;
+       void *snmp_data;
        __u16 data_len;
 
        QETH_CARD_TEXT(card, 3, "snpcmdcb");
@@ -4527,7 +4527,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
        cmd = (struct qeth_ipa_cmd *) sdata;
        data = (unsigned char *)((char *)cmd - reply->offset);
        qinfo = (struct qeth_arp_query_info *) reply->param;
-       snmp = &cmd->data.setadapterparms.data.snmp;
 
        if (cmd->hdr.return_code) {
                QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
@@ -4540,10 +4539,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
                return 0;
        }
        data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
-       if (cmd->data.setadapterparms.hdr.seq_no == 1)
-               data_len -= (__u16)((char *)&snmp->data - (char *)cmd);
-       else
-               data_len -= (__u16)((char *)&snmp->request - (char *)cmd);
+       if (cmd->data.setadapterparms.hdr.seq_no == 1) {
+               snmp_data = &cmd->data.setadapterparms.data.snmp;
+               data_len -= offsetof(struct qeth_ipa_cmd,
+                                    data.setadapterparms.data.snmp);
+       } else {
+               snmp_data = &cmd->data.setadapterparms.data.snmp.request;
+               data_len -= offsetof(struct qeth_ipa_cmd,
+                                    data.setadapterparms.data.snmp.request);
+       }
 
        /* check if there is enough room in userspace */
        if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
@@ -4556,16 +4560,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
        QETH_CARD_TEXT_(card, 4, "sseqn%i",
                cmd->data.setadapterparms.hdr.seq_no);
        /*copy entries to user buffer*/
-       if (cmd->data.setadapterparms.hdr.seq_no == 1) {
-               memcpy(qinfo->udata + qinfo->udata_offset,
-                      (char *)snmp,
-                      data_len + offsetof(struct qeth_snmp_cmd, data));
-               qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data);
-       } else {
-               memcpy(qinfo->udata + qinfo->udata_offset,
-                      (char *)&snmp->request, data_len);
-       }
+       memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
        qinfo->udata_offset += data_len;
+
        /* check if all replies received ... */
                QETH_CARD_TEXT_(card, 4, "srtot%i",
                               cmd->data.setadapterparms.hdr.used_total);
index de629b706d1d7dda246819ed37109f098c05785e..448dcc448f1fe59cb14de60969f71f8ba6c35278 100644 (file)
@@ -866,6 +866,10 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr);
 
 void bpf_jit_free(struct bpf_prog *fp);
 
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+                         const struct bpf_insn *insn, bool extra_pass,
+                         u64 *func_addr, bool *func_addr_fixed);
+
 struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
 void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
 
index b8d95564bd53481acf0bd78b15a140e8ac761776..14edb795ab43045939f9976105cd9060603ea566 100644 (file)
@@ -21,6 +21,19 @@ struct nf_ct_gre_keymap {
        struct nf_conntrack_tuple tuple;
 };
 
+enum grep_conntrack {
+       GRE_CT_UNREPLIED,
+       GRE_CT_REPLIED,
+       GRE_CT_MAX
+};
+
+struct netns_proto_gre {
+       struct nf_proto_net     nf;
+       rwlock_t                keymap_lock;
+       struct list_head        keymap_list;
+       unsigned int            gre_timeouts[GRE_CT_MAX];
+};
+
 /* add new tuple->key_reply pair to keymap */
 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
                         struct nf_conntrack_tuple *t);
index cd24be4c4a99bd633f5718c822920f1aa44b7f12..13d55206bb9fccee2d669aec6a8c09ff92beaf8d 100644 (file)
@@ -9,7 +9,7 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
                       const struct nf_nat_range2 *range,
                       const struct net_device *out);
 
-void nf_nat_masquerade_ipv4_register_notifier(void);
+int nf_nat_masquerade_ipv4_register_notifier(void);
 void nf_nat_masquerade_ipv4_unregister_notifier(void);
 
 #endif /*_NF_NAT_MASQUERADE_IPV4_H_ */
index 0c3b5ebf0bb8d4832322ed16c1c3a106712d73d3..2917bf95c4370db57fcc64a6bb76fd64ad1ef902 100644 (file)
@@ -5,7 +5,7 @@
 unsigned int
 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
                       const struct net_device *out);
-void nf_nat_masquerade_ipv6_register_notifier(void);
+int nf_nat_masquerade_ipv6_register_notifier(void);
 void nf_nat_masquerade_ipv6_unregister_notifier(void);
 
 #endif /* _NF_NAT_MASQUERADE_IPV6_H_ */
index 1a796e0799ec4a524aee5c325734f0ab22a410ac..b1a3545d0ec89f747d1cd51b7140fd64ad2fe6fd 100644 (file)
@@ -672,6 +672,40 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
        bpf_prog_unlock_free(fp);
 }
 
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+                         const struct bpf_insn *insn, bool extra_pass,
+                         u64 *func_addr, bool *func_addr_fixed)
+{
+       s16 off = insn->off;
+       s32 imm = insn->imm;
+       u8 *addr;
+
+       *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
+       if (!*func_addr_fixed) {
+               /* Place-holder address till the last pass has collected
+                * all addresses for JITed subprograms in which case we
+                * can pick them up from prog->aux.
+                */
+               if (!extra_pass)
+                       addr = NULL;
+               else if (prog->aux->func &&
+                        off >= 0 && off < prog->aux->func_cnt)
+                       addr = (u8 *)prog->aux->func[off]->bpf_func;
+               else
+                       return -EINVAL;
+       } else {
+               /* Address of a BPF helper call. Since part of the core
+                * kernel, it's always at a fixed location. __bpf_call_base
+                * and the helper with imm relative to it are both in core
+                * kernel.
+                */
+               addr = (u8 *)__bpf_call_base + imm;
+       }
+
+       *func_addr = (unsigned long)addr;
+       return 0;
+}
+
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
                              const struct bpf_insn *aux,
                              struct bpf_insn *to_buff)
index c97a8f968638c6da0c2ec32c591753f69af59e15..bed9d48a7ae9582928c0ab4c59dd931fea61f1bd 100644 (file)
@@ -139,7 +139,8 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
                return -ENOENT;
 
        new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
-                          map->value_size, __GFP_ZERO | GFP_USER,
+                          map->value_size,
+                          __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
                           map->numa_node);
        if (!new)
                return -ENOMEM;
index 8bbd72d3a121f4e1cb75effd9c5fe5b96728a6b5..b384ea9f3254987f1caa16dff0780900720d93ca 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/bpf.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/capability.h>
 #include "percpu_freelist.h"
 
 #define QUEUE_STACK_CREATE_FLAG_MASK \
@@ -45,8 +46,12 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
 /* Called from syscall */
 static int queue_stack_map_alloc_check(union bpf_attr *attr)
 {
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
        /* check sanity of attributes */
        if (attr->max_entries == 0 || attr->key_size != 0 ||
+           attr->value_size == 0 ||
            attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
                return -EINVAL;
 
@@ -63,15 +68,10 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
 {
        int ret, numa_node = bpf_map_attr_numa_node(attr);
        struct bpf_queue_stack *qs;
-       u32 size, value_size;
-       u64 queue_size, cost;
-
-       size = attr->max_entries + 1;
-       value_size = attr->value_size;
-
-       queue_size = sizeof(*qs) + (u64) value_size * size;
+       u64 size, queue_size, cost;
 
-       cost = queue_size;
+       size = (u64) attr->max_entries + 1;
+       cost = queue_size = sizeof(*qs) + size * attr->value_size;
        if (cost >= U32_MAX - PAGE_SIZE)
                return ERR_PTR(-E2BIG);
 
index 1971ca325fb4e765be7156299e7f02983687019f..6dd419550aba4b98a45ee1e5b1cf6fde4bd7c354 100644 (file)
@@ -5650,7 +5650,7 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
                return;
        /* NOTE: fake 'exit' subprog should be updated as well. */
        for (i = 0; i <= env->subprog_cnt; i++) {
-               if (env->subprog_info[i].start < off)
+               if (env->subprog_info[i].start <= off)
                        continue;
                env->subprog_info[i].start += len - 1;
        }
index 08fcfe440c6374e336b02c1dde33a478fc58615a..9864a35c8bb576e30655bccf62e5174cb1591072 100644 (file)
@@ -196,11 +196,13 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
                        i++;
                } else if (fmt[i] == 'p' || fmt[i] == 's') {
                        mod[fmt_cnt]++;
-                       i++;
-                       if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
+                       /* disallow any further format extensions */
+                       if (fmt[i + 1] != 0 &&
+                           !isspace(fmt[i + 1]) &&
+                           !ispunct(fmt[i + 1]))
                                return -EINVAL;
                        fmt_cnt++;
-                       if (fmt[i - 1] == 's') {
+                       if (fmt[i] == 's') {
                                if (str_seen)
                                        /* allow only one '%s' per fmt string */
                                        return -EINVAL;
index e521c5ebc7d11cdfdcc10307ad973bcac2d1602a..9a1327eb25faf2c73a37d253f88b19c6b92414d6 100644 (file)
@@ -4852,18 +4852,17 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
        } else {
                struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
                struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
-               u16 hnum = ntohs(tuple->ipv6.dport);
                int sdif = inet6_sdif(skb);
 
                if (proto == IPPROTO_TCP)
                        sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
                                            src6, tuple->ipv6.sport,
-                                           dst6, hnum,
+                                           dst6, ntohs(tuple->ipv6.dport),
                                            dif, sdif, &refcounted);
                else if (likely(ipv6_bpf_stub))
                        sk = ipv6_bpf_stub->udp6_lib_lookup(net,
                                                            src6, tuple->ipv6.sport,
-                                                           dst6, hnum,
+                                                           dst6, tuple->ipv6.dport,
                                                            dif, sdif,
                                                            &udp_table, skb);
 #endif
index c09219e7f23048636836d52daf748bf7e81369e1..5dbec21856f4ce458af136bfe5ef1c0c84a4f8a5 100644 (file)
@@ -939,7 +939,7 @@ static int __ip_append_data(struct sock *sk,
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen;
-                       unsigned int pagedlen = 0;
+                       unsigned int pagedlen;
                        struct sk_buff *skb_prev;
 alloc_new_skb:
                        skb_prev = skb;
@@ -956,6 +956,7 @@ alloc_new_skb:
                        if (datalen > mtu - fragheaderlen)
                                datalen = maxfraglen - fragheaderlen;
                        fraglen = datalen + fragheaderlen;
+                       pagedlen = 0;
 
                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
index ce1512b02cb203a549529967eb602b467644a2d5..fd3f9e8a74daf4954d675eaf2cc381196facc4f4 100644 (file)
@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
        int ret;
 
        ret = xt_register_target(&masquerade_tg_reg);
+       if (ret)
+               return ret;
 
-       if (ret == 0)
-               nf_nat_masquerade_ipv4_register_notifier();
+       ret = nf_nat_masquerade_ipv4_register_notifier();
+       if (ret)
+               xt_unregister_target(&masquerade_tg_reg);
 
        return ret;
 }
index a9d5e013e5556a5bace7afcb61cabeb0849261d1..41327bb990932bd8f95fe5b357fac4c0134866e9 100644 (file)
@@ -147,28 +147,50 @@ static struct notifier_block masq_inet_notifier = {
        .notifier_call  = masq_inet_event,
 };
 
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
 
-void nf_nat_masquerade_ipv4_register_notifier(void)
+int nf_nat_masquerade_ipv4_register_notifier(void)
 {
+       int ret = 0;
+
+       mutex_lock(&masq_mutex);
        /* check if the notifier was already set */
-       if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
-               return;
+       if (++masq_refcnt > 1)
+               goto out_unlock;
 
        /* Register for device down reports */
-       register_netdevice_notifier(&masq_dev_notifier);
+       ret = register_netdevice_notifier(&masq_dev_notifier);
+       if (ret)
+               goto err_dec;
        /* Register IP address change reports */
-       register_inetaddr_notifier(&masq_inet_notifier);
+       ret = register_inetaddr_notifier(&masq_inet_notifier);
+       if (ret)
+               goto err_unregister;
+
+       mutex_unlock(&masq_mutex);
+       return ret;
+
+err_unregister:
+       unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+       masq_refcnt--;
+out_unlock:
+       mutex_unlock(&masq_mutex);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
 
 void nf_nat_masquerade_ipv4_unregister_notifier(void)
 {
+       mutex_lock(&masq_mutex);
        /* check if the notifier still has clients */
-       if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
-               return;
+       if (--masq_refcnt > 0)
+               goto out_unlock;
 
        unregister_netdevice_notifier(&masq_dev_notifier);
        unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+       mutex_unlock(&masq_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
index f1193e1e928aa14aaa88371d78558a46ed9c9d59..6847de1d1db8a7e4c86dd27f5c4310df4627c296 100644 (file)
@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
        if (ret < 0)
                return ret;
 
-       nf_nat_masquerade_ipv4_register_notifier();
+       ret = nf_nat_masquerade_ipv4_register_notifier();
+       if (ret)
+               nft_unregister_expr(&nft_masq_ipv4_type);
 
        return ret;
 }
index 1e37c13881893c8a73c0fcaa82a9979d3b5e1798..a9d9555a973fed4e3562a57d1a2cdadfef40dae4 100644 (file)
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
                u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
                u32 delta_us;
 
-               if (!delta)
-                       delta = 1;
-               delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
-               tcp_rcv_rtt_update(tp, delta_us, 0);
+               if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+                       if (!delta)
+                               delta = 1;
+                       delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+                       tcp_rcv_rtt_update(tp, delta_us, 0);
+               }
        }
 }
 
@@ -2910,9 +2912,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
        if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
            flag & FLAG_ACKED) {
                u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
-               u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
 
-               seq_rtt_us = ca_rtt_us = delta_us;
+               if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+                       seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+                       ca_rtt_us = seq_rtt_us;
+               }
        }
        rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
        if (seq_rtt_us < 0)
index 5f8b6d3cd855dc639409e69d84ade5bb2be51626..091c53925e4da6b2b154d166682a0ac0aefd7ecb 100644 (file)
@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        u32 elapsed, start_ts;
+       s32 remaining;
 
        start_ts = tcp_retransmit_stamp(sk);
        if (!icsk->icsk_user_timeout || !start_ts)
                return icsk->icsk_rto;
        elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
-       if (elapsed >= icsk->icsk_user_timeout)
+       remaining = icsk->icsk_user_timeout - elapsed;
+       if (remaining <= 0)
                return 1; /* user timeout has passed; fire ASAP */
-       else
-               return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+       return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
 }
 
 /**
@@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk,
                                (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
                timeout = jiffies_to_msecs(timeout);
        }
-       return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+       return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
 }
 
 /* A write timeout has occurred. Process the after effects. */
index 89e0d5118afe69a94c93e1f584047240fc0d1a51..827a3f5ff3bbdff2e66e09bb90b5b2195b9ef297 100644 (file)
@@ -1354,7 +1354,7 @@ emsgsize:
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen;
-                       unsigned int pagedlen = 0;
+                       unsigned int pagedlen;
 alloc_new_skb:
                        /* There's no room in the current skb */
                        if (skb)
@@ -1378,6 +1378,7 @@ alloc_new_skb:
                        if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
                                datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
                        fraglen = datalen + fragheaderlen;
+                       pagedlen = 0;
 
                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
index 5ae8e1c51079cb2cb36324af300762c5463134ba..8b075f0bc35169b4098bda738950d631b62ec415 100644 (file)
@@ -24,7 +24,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
        unsigned int hh_len;
        struct dst_entry *dst;
        struct flowi6 fl6 = {
-               .flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
+               .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
+                       rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
                .flowi6_mark = skb->mark,
                .flowi6_uid = sock_net_uid(net, sk),
                .daddr = iph->daddr,
index 491f808e356a68046f553785836d220e287c318a..29c7f1915a96cba89c357eeaa9de0b8c29aa9f28 100644 (file)
@@ -58,8 +58,12 @@ static int __init masquerade_tg6_init(void)
        int err;
 
        err = xt_register_target(&masquerade_tg6_reg);
-       if (err == 0)
-               nf_nat_masquerade_ipv6_register_notifier();
+       if (err)
+               return err;
+
+       err = nf_nat_masquerade_ipv6_register_notifier();
+       if (err)
+               xt_unregister_target(&masquerade_tg6_reg);
 
        return err;
 }
index 3e4bf2286abea96617f8df1ecac74d91667ef59f..0ad0da5a260026ccddd96a4becbec9fa3a975e67 100644 (file)
@@ -132,8 +132,8 @@ static void iterate_cleanup_work(struct work_struct *work)
  * of ipv6 addresses being deleted), we also need to add an upper
  * limit to the number of queued work items.
  */
-static int masq_inet_event(struct notifier_block *this,
-                          unsigned long event, void *ptr)
+static int masq_inet6_event(struct notifier_block *this,
+                           unsigned long event, void *ptr)
 {
        struct inet6_ifaddr *ifa = ptr;
        const struct net_device *dev;
@@ -171,30 +171,53 @@ static int masq_inet_event(struct notifier_block *this,
        return NOTIFY_DONE;
 }
 
-static struct notifier_block masq_inet_notifier = {
-       .notifier_call  = masq_inet_event,
+static struct notifier_block masq_inet6_notifier = {
+       .notifier_call  = masq_inet6_event,
 };
 
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
 
-void nf_nat_masquerade_ipv6_register_notifier(void)
+int nf_nat_masquerade_ipv6_register_notifier(void)
 {
+       int ret = 0;
+
+       mutex_lock(&masq_mutex);
        /* check if the notifier is already set */
-       if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
-               return;
+       if (++masq_refcnt > 1)
+               goto out_unlock;
+
+       ret = register_netdevice_notifier(&masq_dev_notifier);
+       if (ret)
+               goto err_dec;
+
+       ret = register_inet6addr_notifier(&masq_inet6_notifier);
+       if (ret)
+               goto err_unregister;
 
-       register_netdevice_notifier(&masq_dev_notifier);
-       register_inet6addr_notifier(&masq_inet_notifier);
+       mutex_unlock(&masq_mutex);
+       return ret;
+
+err_unregister:
+       unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+       masq_refcnt--;
+out_unlock:
+       mutex_unlock(&masq_mutex);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
 
 void nf_nat_masquerade_ipv6_unregister_notifier(void)
 {
+       mutex_lock(&masq_mutex);
        /* check if the notifier still has clients */
-       if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
-               return;
+       if (--masq_refcnt > 0)
+               goto out_unlock;
 
-       unregister_inet6addr_notifier(&masq_inet_notifier);
+       unregister_inet6addr_notifier(&masq_inet6_notifier);
        unregister_netdevice_notifier(&masq_dev_notifier);
+out_unlock:
+       mutex_unlock(&masq_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
index dd0122f3cffea92f587f0c8a598281e77aa5c98b..e06c82e9dfcdf86c7f41ec8dc1693013b3cc4fc1 100644 (file)
@@ -70,7 +70,9 @@ static int __init nft_masq_ipv6_module_init(void)
        if (ret < 0)
                return ret;
 
-       nf_nat_masquerade_ipv6_register_notifier();
+       ret = nf_nat_masquerade_ipv6_register_notifier();
+       if (ret)
+               nft_unregister_expr(&nft_masq_ipv6_type);
 
        return ret;
 }
index 83395bf6dc35e2a3ea486246e98de99b6e1094da..432141f04af3d98b356332ac9723b7a286f93538 100644 (file)
@@ -3980,6 +3980,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
 
 static struct notifier_block ip_vs_dst_notifier = {
        .notifier_call = ip_vs_dst_event,
+#ifdef CONFIG_IP_VS_IPV6
+       .priority = ADDRCONF_NOTIFY_PRIORITY + 5,
+#endif
 };
 
 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
index 02ca7df793f5c07233924c051bbbd9faf60854d4..b6d0f6deea86c34437f997533df3f4f7f4f6bd50 100644 (file)
@@ -49,6 +49,7 @@ struct nf_conncount_tuple {
        struct nf_conntrack_zone        zone;
        int                             cpu;
        u32                             jiffies32;
+       bool                            dead;
        struct rcu_head                 rcu_head;
 };
 
@@ -106,15 +107,16 @@ nf_conncount_add(struct nf_conncount_list *list,
        conn->zone = *zone;
        conn->cpu = raw_smp_processor_id();
        conn->jiffies32 = (u32)jiffies;
-       spin_lock(&list->list_lock);
+       conn->dead = false;
+       spin_lock_bh(&list->list_lock);
        if (list->dead == true) {
                kmem_cache_free(conncount_conn_cachep, conn);
-               spin_unlock(&list->list_lock);
+               spin_unlock_bh(&list->list_lock);
                return NF_CONNCOUNT_SKIP;
        }
        list_add_tail(&conn->node, &list->head);
        list->count++;
-       spin_unlock(&list->list_lock);
+       spin_unlock_bh(&list->list_lock);
        return NF_CONNCOUNT_ADDED;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_add);
@@ -132,19 +134,22 @@ static bool conn_free(struct nf_conncount_list *list,
 {
        bool free_entry = false;
 
-       spin_lock(&list->list_lock);
+       spin_lock_bh(&list->list_lock);
 
-       if (list->count == 0) {
-               spin_unlock(&list->list_lock);
-                return free_entry;
+       if (conn->dead) {
+               spin_unlock_bh(&list->list_lock);
+               return free_entry;
        }
 
        list->count--;
+       conn->dead = true;
        list_del_rcu(&conn->node);
-       if (list->count == 0)
+       if (list->count == 0) {
+               list->dead = true;
                free_entry = true;
+       }
 
-       spin_unlock(&list->list_lock);
+       spin_unlock_bh(&list->list_lock);
        call_rcu(&conn->rcu_head, __conn_free);
        return free_entry;
 }
@@ -245,7 +250,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
 {
        spin_lock_init(&list->list_lock);
        INIT_LIST_HEAD(&list->head);
-       list->count = 1;
+       list->count = 0;
        list->dead = false;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -259,6 +264,7 @@ bool nf_conncount_gc_list(struct net *net,
        struct nf_conn *found_ct;
        unsigned int collected = 0;
        bool free_entry = false;
+       bool ret = false;
 
        list_for_each_entry_safe(conn, conn_n, &list->head, node) {
                found = find_or_evict(net, list, conn, &free_entry);
@@ -288,7 +294,15 @@ bool nf_conncount_gc_list(struct net *net,
                if (collected > CONNCOUNT_GC_MAX_NODES)
                        return false;
        }
-       return false;
+
+       spin_lock_bh(&list->list_lock);
+       if (!list->count) {
+               list->dead = true;
+               ret = true;
+       }
+       spin_unlock_bh(&list->list_lock);
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
 
@@ -309,11 +323,8 @@ static void tree_nodes_free(struct rb_root *root,
        while (gc_count) {
                rbconn = gc_nodes[--gc_count];
                spin_lock(&rbconn->list.list_lock);
-               if (rbconn->list.count == 0 && rbconn->list.dead == false) {
-                       rbconn->list.dead = true;
-                       rb_erase(&rbconn->node, root);
-                       call_rcu(&rbconn->rcu_head, __tree_nodes_free);
-               }
+               rb_erase(&rbconn->node, root);
+               call_rcu(&rbconn->rcu_head, __tree_nodes_free);
                spin_unlock(&rbconn->list.list_lock);
        }
 }
@@ -414,6 +425,7 @@ insert_tree(struct net *net,
        nf_conncount_list_init(&rbconn->list);
        list_add(&conn->node, &rbconn->list.head);
        count = 1;
+       rbconn->list.count = count;
 
        rb_link_node(&rbconn->node, parent, rbnode);
        rb_insert_color(&rbconn->node, root);
index 9b48dc8b4b885a00d8806038fc5fd0948e60cbca..2a5e56c6d8d9f966be97de8ae9153cf3be260c5b 100644 (file)
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
-enum grep_conntrack {
-       GRE_CT_UNREPLIED,
-       GRE_CT_REPLIED,
-       GRE_CT_MAX
-};
-
 static const unsigned int gre_timeouts[GRE_CT_MAX] = {
        [GRE_CT_UNREPLIED]      = 30*HZ,
        [GRE_CT_REPLIED]        = 180*HZ,
 };
 
 static unsigned int proto_gre_net_id __read_mostly;
-struct netns_proto_gre {
-       struct nf_proto_net     nf;
-       rwlock_t                keymap_lock;
-       struct list_head        keymap_list;
-       unsigned int            gre_timeouts[GRE_CT_MAX];
-};
 
 static inline struct netns_proto_gre *gre_pernet(struct net *net)
 {
@@ -402,6 +390,8 @@ static int __init nf_ct_proto_gre_init(void)
 {
        int ret;
 
+       BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
+
        ret = register_pernet_subsys(&proto_gre_net_ops);
        if (ret < 0)
                goto out_pernet;
index 42487d01a3eda2306f78fde4b45919e2e06e941a..2e61aab6ed731356e34df28a6c1c8d41659ad749 100644 (file)
@@ -2457,7 +2457,7 @@ err:
 static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
                                   struct nft_rule *rule)
 {
-       struct nft_expr *expr;
+       struct nft_expr *expr, *next;
 
        /*
         * Careful: some expressions might not be initialized in case this
@@ -2465,8 +2465,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
         */
        expr = nft_expr_first(rule);
        while (expr != nft_expr_last(rule) && expr->ops) {
+               next = nft_expr_next(expr);
                nf_tables_expr_destroy(ctx, expr);
-               expr = nft_expr_next(expr);
+               expr = next;
        }
        kfree(rule);
 }
@@ -2589,17 +2590,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 
                if (chain->use == UINT_MAX)
                        return -EOVERFLOW;
-       }
-
-       if (nla[NFTA_RULE_POSITION]) {
-               if (!(nlh->nlmsg_flags & NLM_F_CREATE))
-                       return -EOPNOTSUPP;
 
-               pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
-               old_rule = __nft_rule_lookup(chain, pos_handle);
-               if (IS_ERR(old_rule)) {
-                       NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
-                       return PTR_ERR(old_rule);
+               if (nla[NFTA_RULE_POSITION]) {
+                       pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+                       old_rule = __nft_rule_lookup(chain, pos_handle);
+                       if (IS_ERR(old_rule)) {
+                               NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
+                               return PTR_ERR(old_rule);
+                       }
                }
        }
 
@@ -2669,21 +2667,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
        }
 
        if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-               if (!nft_is_active_next(net, old_rule)) {
-                       err = -ENOENT;
-                       goto err2;
-               }
-               trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
-                                          old_rule);
+               trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
                if (trans == NULL) {
                        err = -ENOMEM;
                        goto err2;
                }
-               nft_deactivate_next(net, old_rule);
-               chain->use--;
-
-               if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
-                       err = -ENOMEM;
+               err = nft_delrule(&ctx, old_rule);
+               if (err < 0) {
+                       nft_trans_destroy(trans);
                        goto err2;
                }
 
@@ -6324,7 +6315,7 @@ static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
        call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
 }
 
-static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain)
+static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
 {
        struct nft_rule **g0, **g1;
        bool next_genbit;
@@ -6441,11 +6432,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
        /* step 2.  Make rules_gen_X visible to packet path */
        list_for_each_entry(table, &net->nft.tables, list) {
-               list_for_each_entry(chain, &table->chains, list) {
-                       if (!nft_is_active_next(net, chain))
-                               continue;
-                       nf_tables_commit_chain_active(net, chain);
-               }
+               list_for_each_entry(chain, &table->chains, list)
+                       nf_tables_commit_chain(net, chain);
        }
 
        /*
index a518eb162344e6692e69989cf5ba0cdf03da1333..109b0d27345acc1afac24524c107043d4d10958a 100644 (file)
@@ -455,7 +455,8 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
        case IPPROTO_TCP:
                timeouts = nf_tcp_pernet(net)->timeouts;
                break;
-       case IPPROTO_UDP:
+       case IPPROTO_UDP: /* fallthrough */
+       case IPPROTO_UDPLITE:
                timeouts = nf_udp_pernet(net)->timeouts;
                break;
        case IPPROTO_DCCP:
@@ -469,13 +470,23 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
        case IPPROTO_SCTP:
 #ifdef CONFIG_NF_CT_PROTO_SCTP
                timeouts = nf_sctp_pernet(net)->timeouts;
+#endif
+               break;
+       case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+               if (l4proto->net_id) {
+                       struct netns_proto_gre *net_gre;
+
+                       net_gre = net_generic(net, *l4proto->net_id);
+                       timeouts = net_gre->gre_timeouts;
+               }
 #endif
                break;
        case 255:
                timeouts = &nf_generic_pernet(net)->timeout;
                break;
        default:
-               WARN_ON_ONCE(1);
+               WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
                break;
        }
 
index 9d0ede4742240f544bbdff40553b73b8e755c7b4..7334e0b80a5effe521bb807dda2ab259a62df8d8 100644 (file)
@@ -520,6 +520,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
                    void *info)
 {
        struct xt_match *match = expr->ops->data;
+       struct module *me = match->me;
        struct xt_mtdtor_param par;
 
        par.net = ctx->net;
@@ -530,7 +531,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
                par.match->destroy(&par);
 
        if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
-               module_put(match->me);
+               module_put(me);
 }
 
 static void
index e82d9a966c45a5fc026b4ac939af294f4f57730b..974525eb92df7246c2e3acbc4cec495dcfd31862 100644 (file)
@@ -214,7 +214,9 @@ static int __init nft_flow_offload_module_init(void)
 {
        int err;
 
-       register_netdevice_notifier(&flow_offload_netdev_notifier);
+       err = register_netdevice_notifier(&flow_offload_netdev_notifier);
+       if (err)
+               goto err;
 
        err = nft_register_expr(&nft_flow_offload_type);
        if (err < 0)
@@ -224,6 +226,7 @@ static int __init nft_flow_offload_module_init(void)
 
 register_expr:
        unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+err:
        return err;
 }
 
index dec843cadf462667ae015c146d0c606808d57e65..9e05c86ba5c452f201cf1b1f7d7d59808b994b8a 100644 (file)
@@ -201,18 +201,8 @@ static __net_init int xt_rateest_net_init(struct net *net)
        return 0;
 }
 
-static void __net_exit xt_rateest_net_exit(struct net *net)
-{
-       struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
-               WARN_ON_ONCE(!hlist_empty(&xn->hash[i]));
-}
-
 static struct pernet_operations xt_rateest_net_ops = {
        .init = xt_rateest_net_init,
-       .exit = xt_rateest_net_exit,
        .id   = &xt_rateest_id,
        .size = sizeof(struct xt_rateest_net),
 };
index 3e7d259e5d8de01148729022f55c44310f7d6ed7..1ad4017f9b7349849d845c9e5bb9a5592533a291 100644 (file)
@@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
 
        /* copy match config into hashtable config */
        ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
-
-       if (ret)
+       if (ret) {
+               vfree(hinfo);
                return ret;
+       }
 
        hinfo->cfg.size = size;
        if (hinfo->cfg.max == 0)
@@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
        int ret;
 
        ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
        if (ret)
                return ret;
 
@@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
        int ret;
 
        ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
        if (ret)
                return ret;
 
@@ -921,7 +920,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
                return ret;
 
        ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
        if (ret)
                return ret;
 
@@ -940,7 +938,6 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
                return ret;
 
        ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
        if (ret)
                return ret;
 
index b0e74a3e77ec507c87a54ecc10dbce1c6a7b554b..025f48e14a91f19facf557acb27ee45c0dee46c8 100644 (file)
@@ -410,6 +410,7 @@ static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
        head->truesize += skb->truesize;
        head->data_len += skb->len;
        head->len += skb->len;
+       refcount_add(skb->truesize, &head->sk->sk_wmem_alloc);
 
        __skb_header_release(skb);
 }
index 2afc4f8c37a74db4896508283f434909a0151732..48801976643358efd72623e8c5de56cc77202afd 100644 (file)
@@ -584,12 +584,15 @@ static void  tipc_node_clear_links(struct tipc_node *node)
 /* tipc_node_cleanup - delete nodes that does not
  * have active links for NODE_CLEANUP_AFTER time
  */
-static int tipc_node_cleanup(struct tipc_node *peer)
+static bool tipc_node_cleanup(struct tipc_node *peer)
 {
        struct tipc_net *tn = tipc_net(peer->net);
        bool deleted = false;
 
-       spin_lock_bh(&tn->node_list_lock);
+       /* If lock held by tipc_node_stop() the node will be deleted anyway */
+       if (!spin_trylock_bh(&tn->node_list_lock))
+               return false;
+
        tipc_node_write_lock(peer);
 
        if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
index edbe81534c6d2941b955cd0ab15cf845110fb130..d07ccf8a23f7170be674d12a08957d673a560f77 100644 (file)
@@ -137,4 +137,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-       **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
+       **bpf**\ (2),
+       **bpf-helpers**\ (7),
+       **bpftool**\ (8),
+       **bpftool-prog**\ (8),
+       **bpftool-map**\ (8),
+       **bpftool-net**\ (8),
+       **bpftool-perf**\ (8)
index f55a2daed59b7e8dead1a1fc429dd6a097f2f83f..7bb787cfa97145eb1bede4ea2057903d6587398f 100644 (file)
@@ -171,4 +171,10 @@ The following three commands are equivalent:
 
 SEE ALSO
 ========
-       **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+       **bpf**\ (2),
+       **bpf-helpers**\ (7),
+       **bpftool**\ (8),
+       **bpftool-prog**\ (8),
+       **bpftool-cgroup**\ (8),
+       **bpftool-net**\ (8),
+       **bpftool-perf**\ (8)
index 408ec30d88726206b3bad8436ec8c9abdd11d806..ed87c9b619adc889310e7795bcfcc4b790251827 100644 (file)
@@ -136,4 +136,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-       **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
+       **bpf**\ (2),
+       **bpf-helpers**\ (7),
+       **bpftool**\ (8),
+       **bpftool-prog**\ (8),
+       **bpftool-map**\ (8),
+       **bpftool-cgroup**\ (8),
+       **bpftool-perf**\ (8)
index e3eb0eab76419427509b213e00c6458e68ff61fc..f4c5e5538bb8e516624b0f28f749bf613c5a477e 100644 (file)
@@ -78,4 +78,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-       **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
+       **bpf**\ (2),
+       **bpf-helpers**\ (7),
+       **bpftool**\ (8),
+       **bpftool-prog**\ (8),
+       **bpftool-map**\ (8),
+       **bpftool-cgroup**\ (8),
+       **bpftool-net**\ (8)
index ac4e904b10fbd9b4b504d943f62aef0be68f0829..ecf618807125d9af832ec1ec969047c060393620 100644 (file)
@@ -124,7 +124,8 @@ OPTIONS
                  Generate human-readable JSON output. Implies **-j**.
 
        -f, --bpffs
-                 Show file names of pinned programs.
+                 When showing BPF programs, show file names of pinned
+                 programs.
 
 EXAMPLES
 ========
@@ -206,4 +207,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-       **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8)
+       **bpf**\ (2),
+       **bpf-helpers**\ (7),
+       **bpftool**\ (8),
+       **bpftool-map**\ (8),
+       **bpftool-cgroup**\ (8),
+       **bpftool-net**\ (8),
+       **bpftool-perf**\ (8)
index 04cd4f92ab89c9dd8009180c3e4601c7b1956d1e..129b7a9c0f9bce5ac78f49d6678e39c8c6fbaa8b 100644 (file)
@@ -63,5 +63,10 @@ OPTIONS
 
 SEE ALSO
 ========
-       **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
-        **bpftool-perf**\ (8), **bpftool-net**\ (8)
+       **bpf**\ (2),
+       **bpf-helpers**\ (7),
+       **bpftool-prog**\ (8),
+       **bpftool-map**\ (8),
+       **bpftool-cgroup**\ (8),
+       **bpftool-net**\ (8),
+       **bpftool-perf**\ (8)
index 25af85304ebee3b073d9e7d0036038c5c4c05d68..70fd48d79f611fd98666a0ecf156c00285007fd9 100644 (file)
@@ -130,16 +130,17 @@ static int mnt_bpffs(const char *target, char *buff, size_t bufflen)
        return 0;
 }
 
-int open_obj_pinned(char *path)
+int open_obj_pinned(char *path, bool quiet)
 {
        int fd;
 
        fd = bpf_obj_get(path);
        if (fd < 0) {
-               p_err("bpf obj get (%s): %s", path,
-                     errno == EACCES && !is_bpffs(dirname(path)) ?
-                   "directory not in bpf file system (bpffs)" :
-                   strerror(errno));
+               if (!quiet)
+                       p_err("bpf obj get (%s): %s", path,
+                             errno == EACCES && !is_bpffs(dirname(path)) ?
+                           "directory not in bpf file system (bpffs)" :
+                           strerror(errno));
                return -1;
        }
 
@@ -151,7 +152,7 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
        enum bpf_obj_type type;
        int fd;
 
-       fd = open_obj_pinned(path);
+       fd = open_obj_pinned(path, false);
        if (fd < 0)
                return -1;
 
@@ -304,7 +305,7 @@ char *get_fdinfo(int fd, const char *key)
                return NULL;
        }
 
-       while ((n = getline(&line, &line_n, fdi))) {
+       while ((n = getline(&line, &line_n, fdi)) > 0) {
                char *value;
                int len;
 
@@ -384,7 +385,7 @@ int build_pinned_obj_table(struct pinned_obj_table *tab,
                while ((ftse = fts_read(fts))) {
                        if (!(ftse->fts_info & FTS_F))
                                continue;
-                       fd = open_obj_pinned(ftse->fts_path);
+                       fd = open_obj_pinned(ftse->fts_path, true);
                        if (fd < 0)
                                continue;
 
index 28322ace285653f91a9fe42e69a29483b0a1280b..a8bf1e2d9818debfc200701f21d438d432e093fc 100644 (file)
@@ -127,7 +127,7 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
 int get_fd_type(int fd);
 const char *get_fd_type_name(enum bpf_obj_type type);
 char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(char *path);
+int open_obj_pinned(char *path, bool quiet);
 int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
 int do_pin_fd(int fd, const char *name);
index 5302ee282409eb039458447ad40215f24516c48a..ccee180dfb761248f078ffa4b2154793d0308420 100644 (file)
@@ -357,10 +357,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
        if (!hash_empty(prog_table.table)) {
                struct pinned_obj *obj;
 
-               printf("\n");
                hash_for_each_possible(prog_table.table, obj, hash, info->id) {
                        if (obj->id == info->id)
-                               printf("\tpinned %s\n", obj->path);
+                               printf("\n\tpinned %s", obj->path);
                }
        }
 
@@ -845,6 +844,7 @@ static int do_load(int argc, char **argv)
                        }
                        NEXT_ARG();
                } else if (is_prefix(*argv, "map")) {
+                       void *new_map_replace;
                        char *endptr, *name;
                        int fd;
 
@@ -878,12 +878,15 @@ static int do_load(int argc, char **argv)
                        if (fd < 0)
                                goto err_free_reuse_maps;
 
-                       map_replace = reallocarray(map_replace, old_map_fds + 1,
-                                                  sizeof(*map_replace));
-                       if (!map_replace) {
+                       new_map_replace = reallocarray(map_replace,
+                                                      old_map_fds + 1,
+                                                      sizeof(*map_replace));
+                       if (!new_map_replace) {
                                p_err("mem alloc failed");
                                goto err_free_reuse_maps;
                        }
+                       map_replace = new_map_replace;
+
                        map_replace[old_map_fds].idx = idx;
                        map_replace[old_map_fds].name = name;
                        map_replace[old_map_fds].fd = fd;
diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
new file mode 100644 (file)
index 0000000..401d0c1
--- /dev/null
@@ -0,0 +1,612 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_PKT_CLS_H
+#define __LINUX_PKT_CLS_H
+
+#include <linux/types.h>
+#include <linux/pkt_sched.h>
+
+#define TC_COOKIE_MAX_SIZE 16
+
+/* Action attributes */
+enum {
+       TCA_ACT_UNSPEC,
+       TCA_ACT_KIND,
+       TCA_ACT_OPTIONS,
+       TCA_ACT_INDEX,
+       TCA_ACT_STATS,
+       TCA_ACT_PAD,
+       TCA_ACT_COOKIE,
+       __TCA_ACT_MAX
+};
+
+#define TCA_ACT_MAX __TCA_ACT_MAX
+#define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
+#define TCA_ACT_MAX_PRIO 32
+#define TCA_ACT_BIND   1
+#define TCA_ACT_NOBIND 0
+#define TCA_ACT_UNBIND 1
+#define TCA_ACT_NOUNBIND       0
+#define TCA_ACT_REPLACE                1
+#define TCA_ACT_NOREPLACE      0
+
+#define TC_ACT_UNSPEC  (-1)
+#define TC_ACT_OK              0
+#define TC_ACT_RECLASSIFY      1
+#define TC_ACT_SHOT            2
+#define TC_ACT_PIPE            3
+#define TC_ACT_STOLEN          4
+#define TC_ACT_QUEUED          5
+#define TC_ACT_REPEAT          6
+#define TC_ACT_REDIRECT                7
+#define TC_ACT_TRAP            8 /* For hw path, this means "trap to cpu"
+                                  * and don't further process the frame
+                                  * in hardware. For sw path, this is
+                                  * equivalent of TC_ACT_STOLEN - drop
+                                  * the skb and act like everything
+                                  * is alright.
+                                  */
+#define TC_ACT_VALUE_MAX       TC_ACT_TRAP
+
+/* There is a special kind of actions called "extended actions",
+ * which need a value parameter. These have a local opcode located in
+ * the highest nibble, starting from 1. The rest of the bits
+ * are used to carry the value. These two parts together make
+ * a combined opcode.
+ */
+#define __TC_ACT_EXT_SHIFT 28
+#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
+#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
+#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
+#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
+
+#define TC_ACT_JUMP __TC_ACT_EXT(1)
+#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
+#define TC_ACT_EXT_OPCODE_MAX  TC_ACT_GOTO_CHAIN
+
+/* Action type identifiers*/
+enum {
+       TCA_ID_UNSPEC=0,
+       TCA_ID_POLICE=1,
+       /* other actions go here */
+       __TCA_ID_MAX=255
+};
+
+#define TCA_ID_MAX __TCA_ID_MAX
+
+struct tc_police {
+       __u32                   index;
+       int                     action;
+#define TC_POLICE_UNSPEC       TC_ACT_UNSPEC
+#define TC_POLICE_OK           TC_ACT_OK
+#define TC_POLICE_RECLASSIFY   TC_ACT_RECLASSIFY
+#define TC_POLICE_SHOT         TC_ACT_SHOT
+#define TC_POLICE_PIPE         TC_ACT_PIPE
+
+       __u32                   limit;
+       __u32                   burst;
+       __u32                   mtu;
+       struct tc_ratespec      rate;
+       struct tc_ratespec      peakrate;
+       int                     refcnt;
+       int                     bindcnt;
+       __u32                   capab;
+};
+
+struct tcf_t {
+       __u64   install;
+       __u64   lastuse;
+       __u64   expires;
+       __u64   firstuse;
+};
+
+struct tc_cnt {
+       int                   refcnt;
+       int                   bindcnt;
+};
+
+#define tc_gen \
+       __u32                 index; \
+       __u32                 capab; \
+       int                   action; \
+       int                   refcnt; \
+       int                   bindcnt
+
+enum {
+       TCA_POLICE_UNSPEC,
+       TCA_POLICE_TBF,
+       TCA_POLICE_RATE,
+       TCA_POLICE_PEAKRATE,
+       TCA_POLICE_AVRATE,
+       TCA_POLICE_RESULT,
+       TCA_POLICE_TM,
+       TCA_POLICE_PAD,
+       __TCA_POLICE_MAX
+#define TCA_POLICE_RESULT TCA_POLICE_RESULT
+};
+
+#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1)
+
+/* tca flags definitions */
+#define TCA_CLS_FLAGS_SKIP_HW  (1 << 0) /* don't offload filter to HW */
+#define TCA_CLS_FLAGS_SKIP_SW  (1 << 1) /* don't use filter in SW */
+#define TCA_CLS_FLAGS_IN_HW    (1 << 2) /* filter is offloaded to HW */
+#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */
+#define TCA_CLS_FLAGS_VERBOSE  (1 << 4) /* verbose logging */
+
+/* U32 filters */
+
+#define TC_U32_HTID(h) ((h)&0xFFF00000)
+#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20)
+#define TC_U32_HASH(h) (((h)>>12)&0xFF)
+#define TC_U32_NODE(h) ((h)&0xFFF)
+#define TC_U32_KEY(h) ((h)&0xFFFFF)
+#define TC_U32_UNSPEC  0
+#define TC_U32_ROOT    (0xFFF00000)
+
+enum {
+       TCA_U32_UNSPEC,
+       TCA_U32_CLASSID,
+       TCA_U32_HASH,
+       TCA_U32_LINK,
+       TCA_U32_DIVISOR,
+       TCA_U32_SEL,
+       TCA_U32_POLICE,
+       TCA_U32_ACT,
+       TCA_U32_INDEV,
+       TCA_U32_PCNT,
+       TCA_U32_MARK,
+       TCA_U32_FLAGS,
+       TCA_U32_PAD,
+       __TCA_U32_MAX
+};
+
+#define TCA_U32_MAX (__TCA_U32_MAX - 1)
+
+struct tc_u32_key {
+       __be32          mask;
+       __be32          val;
+       int             off;
+       int             offmask;
+};
+
+struct tc_u32_sel {
+       unsigned char           flags;
+       unsigned char           offshift;
+       unsigned char           nkeys;
+
+       __be16                  offmask;
+       __u16                   off;
+       short                   offoff;
+
+       short                   hoff;
+       __be32                  hmask;
+       struct tc_u32_key       keys[0];
+};
+
+struct tc_u32_mark {
+       __u32           val;
+       __u32           mask;
+       __u32           success;
+};
+
+struct tc_u32_pcnt {
+       __u64 rcnt;
+       __u64 rhit;
+       __u64 kcnts[0];
+};
+
+/* Flags */
+
+#define TC_U32_TERMINAL                1
+#define TC_U32_OFFSET          2
+#define TC_U32_VAROFFSET       4
+#define TC_U32_EAT             8
+
+#define TC_U32_MAXDEPTH 8
+
+
+/* RSVP filter */
+
+enum {
+       TCA_RSVP_UNSPEC,
+       TCA_RSVP_CLASSID,
+       TCA_RSVP_DST,
+       TCA_RSVP_SRC,
+       TCA_RSVP_PINFO,
+       TCA_RSVP_POLICE,
+       TCA_RSVP_ACT,
+       __TCA_RSVP_MAX
+};
+
+#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
+
+struct tc_rsvp_gpi {
+       __u32   key;
+       __u32   mask;
+       int     offset;
+};
+
+struct tc_rsvp_pinfo {
+       struct tc_rsvp_gpi dpi;
+       struct tc_rsvp_gpi spi;
+       __u8    protocol;
+       __u8    tunnelid;
+       __u8    tunnelhdr;
+       __u8    pad;
+};
+
+/* ROUTE filter */
+
+enum {
+       TCA_ROUTE4_UNSPEC,
+       TCA_ROUTE4_CLASSID,
+       TCA_ROUTE4_TO,
+       TCA_ROUTE4_FROM,
+       TCA_ROUTE4_IIF,
+       TCA_ROUTE4_POLICE,
+       TCA_ROUTE4_ACT,
+       __TCA_ROUTE4_MAX
+};
+
+#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1)
+
+
+/* FW filter */
+
+enum {
+       TCA_FW_UNSPEC,
+       TCA_FW_CLASSID,
+       TCA_FW_POLICE,
+       TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
+       TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
+       TCA_FW_MASK,
+       __TCA_FW_MAX
+};
+
+#define TCA_FW_MAX (__TCA_FW_MAX - 1)
+
+/* TC index filter */
+
+enum {
+       TCA_TCINDEX_UNSPEC,
+       TCA_TCINDEX_HASH,
+       TCA_TCINDEX_MASK,
+       TCA_TCINDEX_SHIFT,
+       TCA_TCINDEX_FALL_THROUGH,
+       TCA_TCINDEX_CLASSID,
+       TCA_TCINDEX_POLICE,
+       TCA_TCINDEX_ACT,
+       __TCA_TCINDEX_MAX
+};
+
+#define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
+
+/* Flow filter */
+
+enum {
+       FLOW_KEY_SRC,
+       FLOW_KEY_DST,
+       FLOW_KEY_PROTO,
+       FLOW_KEY_PROTO_SRC,
+       FLOW_KEY_PROTO_DST,
+       FLOW_KEY_IIF,
+       FLOW_KEY_PRIORITY,
+       FLOW_KEY_MARK,
+       FLOW_KEY_NFCT,
+       FLOW_KEY_NFCT_SRC,
+       FLOW_KEY_NFCT_DST,
+       FLOW_KEY_NFCT_PROTO_SRC,
+       FLOW_KEY_NFCT_PROTO_DST,
+       FLOW_KEY_RTCLASSID,
+       FLOW_KEY_SKUID,
+       FLOW_KEY_SKGID,
+       FLOW_KEY_VLAN_TAG,
+       FLOW_KEY_RXHASH,
+       __FLOW_KEY_MAX,
+};
+
+#define FLOW_KEY_MAX   (__FLOW_KEY_MAX - 1)
+
+enum {
+       FLOW_MODE_MAP,
+       FLOW_MODE_HASH,
+};
+
+enum {
+       TCA_FLOW_UNSPEC,
+       TCA_FLOW_KEYS,
+       TCA_FLOW_MODE,
+       TCA_FLOW_BASECLASS,
+       TCA_FLOW_RSHIFT,
+       TCA_FLOW_ADDEND,
+       TCA_FLOW_MASK,
+       TCA_FLOW_XOR,
+       TCA_FLOW_DIVISOR,
+       TCA_FLOW_ACT,
+       TCA_FLOW_POLICE,
+       TCA_FLOW_EMATCHES,
+       TCA_FLOW_PERTURB,
+       __TCA_FLOW_MAX
+};
+
+#define TCA_FLOW_MAX   (__TCA_FLOW_MAX - 1)
+
+/* Basic filter */
+
+enum {
+       TCA_BASIC_UNSPEC,
+       TCA_BASIC_CLASSID,
+       TCA_BASIC_EMATCHES,
+       TCA_BASIC_ACT,
+       TCA_BASIC_POLICE,
+       __TCA_BASIC_MAX
+};
+
+#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
+
+
+/* Cgroup classifier */
+
+enum {
+       TCA_CGROUP_UNSPEC,
+       TCA_CGROUP_ACT,
+       TCA_CGROUP_POLICE,
+       TCA_CGROUP_EMATCHES,
+       __TCA_CGROUP_MAX,
+};
+
+#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1)
+
+/* BPF classifier */
+
+#define TCA_BPF_FLAG_ACT_DIRECT                (1 << 0)
+
+enum {
+       TCA_BPF_UNSPEC,
+       TCA_BPF_ACT,
+       TCA_BPF_POLICE,
+       TCA_BPF_CLASSID,
+       TCA_BPF_OPS_LEN,
+       TCA_BPF_OPS,
+       TCA_BPF_FD,
+       TCA_BPF_NAME,
+       TCA_BPF_FLAGS,
+       TCA_BPF_FLAGS_GEN,
+       TCA_BPF_TAG,
+       TCA_BPF_ID,
+       __TCA_BPF_MAX,
+};
+
+#define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
+
+/* Flower classifier */
+
+enum {
+       TCA_FLOWER_UNSPEC,
+       TCA_FLOWER_CLASSID,
+       TCA_FLOWER_INDEV,
+       TCA_FLOWER_ACT,
+       TCA_FLOWER_KEY_ETH_DST,         /* ETH_ALEN */
+       TCA_FLOWER_KEY_ETH_DST_MASK,    /* ETH_ALEN */
+       TCA_FLOWER_KEY_ETH_SRC,         /* ETH_ALEN */
+       TCA_FLOWER_KEY_ETH_SRC_MASK,    /* ETH_ALEN */
+       TCA_FLOWER_KEY_ETH_TYPE,        /* be16 */
+       TCA_FLOWER_KEY_IP_PROTO,        /* u8 */
+       TCA_FLOWER_KEY_IPV4_SRC,        /* be32 */
+       TCA_FLOWER_KEY_IPV4_SRC_MASK,   /* be32 */
+       TCA_FLOWER_KEY_IPV4_DST,        /* be32 */
+       TCA_FLOWER_KEY_IPV4_DST_MASK,   /* be32 */
+       TCA_FLOWER_KEY_IPV6_SRC,        /* struct in6_addr */
+       TCA_FLOWER_KEY_IPV6_SRC_MASK,   /* struct in6_addr */
+       TCA_FLOWER_KEY_IPV6_DST,        /* struct in6_addr */
+       TCA_FLOWER_KEY_IPV6_DST_MASK,   /* struct in6_addr */
+       TCA_FLOWER_KEY_TCP_SRC,         /* be16 */
+       TCA_FLOWER_KEY_TCP_DST,         /* be16 */
+       TCA_FLOWER_KEY_UDP_SRC,         /* be16 */
+       TCA_FLOWER_KEY_UDP_DST,         /* be16 */
+
+       TCA_FLOWER_FLAGS,
+       TCA_FLOWER_KEY_VLAN_ID,         /* be16 */
+       TCA_FLOWER_KEY_VLAN_PRIO,       /* u8   */
+       TCA_FLOWER_KEY_VLAN_ETH_TYPE,   /* be16 */
+
+       TCA_FLOWER_KEY_ENC_KEY_ID,      /* be32 */
+       TCA_FLOWER_KEY_ENC_IPV4_SRC,    /* be32 */
+       TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */
+       TCA_FLOWER_KEY_ENC_IPV4_DST,    /* be32 */
+       TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */
+       TCA_FLOWER_KEY_ENC_IPV6_SRC,    /* struct in6_addr */
+       TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */
+       TCA_FLOWER_KEY_ENC_IPV6_DST,    /* struct in6_addr */
+       TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
+
+       TCA_FLOWER_KEY_TCP_SRC_MASK,    /* be16 */
+       TCA_FLOWER_KEY_TCP_DST_MASK,    /* be16 */
+       TCA_FLOWER_KEY_UDP_SRC_MASK,    /* be16 */
+       TCA_FLOWER_KEY_UDP_DST_MASK,    /* be16 */
+       TCA_FLOWER_KEY_SCTP_SRC_MASK,   /* be16 */
+       TCA_FLOWER_KEY_SCTP_DST_MASK,   /* be16 */
+
+       TCA_FLOWER_KEY_SCTP_SRC,        /* be16 */
+       TCA_FLOWER_KEY_SCTP_DST,        /* be16 */
+
+       TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,        /* be16 */
+       TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,   /* be16 */
+       TCA_FLOWER_KEY_ENC_UDP_DST_PORT,        /* be16 */
+       TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,   /* be16 */
+
+       TCA_FLOWER_KEY_FLAGS,           /* be32 */
+       TCA_FLOWER_KEY_FLAGS_MASK,      /* be32 */
+
+       TCA_FLOWER_KEY_ICMPV4_CODE,     /* u8 */
+       TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */
+       TCA_FLOWER_KEY_ICMPV4_TYPE,     /* u8 */
+       TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */
+       TCA_FLOWER_KEY_ICMPV6_CODE,     /* u8 */
+       TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */
+       TCA_FLOWER_KEY_ICMPV6_TYPE,     /* u8 */
+       TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */
+
+       TCA_FLOWER_KEY_ARP_SIP,         /* be32 */
+       TCA_FLOWER_KEY_ARP_SIP_MASK,    /* be32 */
+       TCA_FLOWER_KEY_ARP_TIP,         /* be32 */
+       TCA_FLOWER_KEY_ARP_TIP_MASK,    /* be32 */
+       TCA_FLOWER_KEY_ARP_OP,          /* u8 */
+       TCA_FLOWER_KEY_ARP_OP_MASK,     /* u8 */
+       TCA_FLOWER_KEY_ARP_SHA,         /* ETH_ALEN */
+       TCA_FLOWER_KEY_ARP_SHA_MASK,    /* ETH_ALEN */
+       TCA_FLOWER_KEY_ARP_THA,         /* ETH_ALEN */
+       TCA_FLOWER_KEY_ARP_THA_MASK,    /* ETH_ALEN */
+
+       TCA_FLOWER_KEY_MPLS_TTL,        /* u8 - 8 bits */
+       TCA_FLOWER_KEY_MPLS_BOS,        /* u8 - 1 bit */
+       TCA_FLOWER_KEY_MPLS_TC,         /* u8 - 3 bits */
+       TCA_FLOWER_KEY_MPLS_LABEL,      /* be32 - 20 bits */
+
+       TCA_FLOWER_KEY_TCP_FLAGS,       /* be16 */
+       TCA_FLOWER_KEY_TCP_FLAGS_MASK,  /* be16 */
+
+       TCA_FLOWER_KEY_IP_TOS,          /* u8 */
+       TCA_FLOWER_KEY_IP_TOS_MASK,     /* u8 */
+       TCA_FLOWER_KEY_IP_TTL,          /* u8 */
+       TCA_FLOWER_KEY_IP_TTL_MASK,     /* u8 */
+
+       TCA_FLOWER_KEY_CVLAN_ID,        /* be16 */
+       TCA_FLOWER_KEY_CVLAN_PRIO,      /* u8   */
+       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,  /* be16 */
+
+       TCA_FLOWER_KEY_ENC_IP_TOS,      /* u8 */
+       TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */
+       TCA_FLOWER_KEY_ENC_IP_TTL,      /* u8 */
+       TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */
+
+       TCA_FLOWER_KEY_ENC_OPTS,
+       TCA_FLOWER_KEY_ENC_OPTS_MASK,
+
+       TCA_FLOWER_IN_HW_COUNT,
+
+       __TCA_FLOWER_MAX,
+};
+
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+
+enum {
+       TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
+       TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
+                                        * TCA_FLOWER_KEY_ENC_OPT_GENEVE_
+                                        * attributes
+                                        */
+       __TCA_FLOWER_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+       TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
+       TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,            /* u16 */
+       TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,             /* u8 */
+       TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,             /* 4 to 128 bytes */
+
+       __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
+               (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
+
+enum {
+       TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+       TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
+};
+
+/* Match-all classifier */
+
+enum {
+       TCA_MATCHALL_UNSPEC,
+       TCA_MATCHALL_CLASSID,
+       TCA_MATCHALL_ACT,
+       TCA_MATCHALL_FLAGS,
+       __TCA_MATCHALL_MAX,
+};
+
+#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
+
+/* Extended Matches */
+
+struct tcf_ematch_tree_hdr {
+       __u16           nmatches;
+       __u16           progid;
+};
+
+enum {
+       TCA_EMATCH_TREE_UNSPEC,
+       TCA_EMATCH_TREE_HDR,
+       TCA_EMATCH_TREE_LIST,
+       __TCA_EMATCH_TREE_MAX
+};
+#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
+
+struct tcf_ematch_hdr {
+       __u16           matchid;
+       __u16           kind;
+       __u16           flags;
+       __u16           pad; /* currently unused */
+};
+
+/*  0                   1
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 
+ * +-----------------------+-+-+---+
+ * |         Unused        |S|I| R |
+ * +-----------------------+-+-+---+
+ *
+ * R(2) ::= relation to next ematch
+ *          where: 0 0 END (last ematch)
+ *                 0 1 AND
+ *                 1 0 OR
+ *                 1 1 Unused (invalid)
+ * I(1) ::= invert result
+ * S(1) ::= simple payload
+ */
+#define TCF_EM_REL_END 0
+#define TCF_EM_REL_AND (1<<0)
+#define TCF_EM_REL_OR  (1<<1)
+#define TCF_EM_INVERT  (1<<2)
+#define TCF_EM_SIMPLE  (1<<3)
+
+#define TCF_EM_REL_MASK        3
+#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
+
+enum {
+       TCF_LAYER_LINK,
+       TCF_LAYER_NETWORK,
+       TCF_LAYER_TRANSPORT,
+       __TCF_LAYER_MAX
+};
+#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1)
+
+/* Ematch type assignments
+ *   1..32767          Reserved for ematches inside kernel tree
+ *   32768..65535      Free to use, not reliable
+ */
+#define        TCF_EM_CONTAINER        0
+#define        TCF_EM_CMP              1
+#define        TCF_EM_NBYTE            2
+#define        TCF_EM_U32              3
+#define        TCF_EM_META             4
+#define        TCF_EM_TEXT             5
+#define        TCF_EM_VLAN             6
+#define        TCF_EM_CANID            7
+#define        TCF_EM_IPSET            8
+#define        TCF_EM_IPT              9
+#define        TCF_EM_MAX              9
+
+enum {
+       TCF_EM_PROG_TC
+};
+
+enum {
+       TCF_EM_OPND_EQ,
+       TCF_EM_OPND_GT,
+       TCF_EM_OPND_LT
+};
+
+#endif
diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h
new file mode 100644 (file)
index 0000000..6e89a5d
--- /dev/null
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_BPF_H
+#define __LINUX_TC_BPF_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_BPF 13
+
+struct tc_act_bpf {
+       tc_gen;
+};
+
+enum {
+       TCA_ACT_BPF_UNSPEC,
+       TCA_ACT_BPF_TM,
+       TCA_ACT_BPF_PARMS,
+       TCA_ACT_BPF_OPS_LEN,
+       TCA_ACT_BPF_OPS,
+       TCA_ACT_BPF_FD,
+       TCA_ACT_BPF_NAME,
+       TCA_ACT_BPF_PAD,
+       TCA_ACT_BPF_TAG,
+       TCA_ACT_BPF_ID,
+       __TCA_ACT_BPF_MAX,
+};
+#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
+
+#endif
index f1fe492c8e17d060d70009f5ff139cbf975c6f72..f0017c831e57bdf48caf0e4a334db009ad23f0bb 100644 (file)
@@ -24,6 +24,7 @@ TARGETS += memory-hotplug
 TARGETS += mount
 TARGETS += mqueue
 TARGETS += net
+TARGETS += netfilter
 TARGETS += nsfs
 TARGETS += powerpc
 TARGETS += proc
index 7887df6933998c62d6d81926066918728d704c8c..44ed7f29f8ab6cec40c83c02c01d28ca36c36492 100644 (file)
@@ -81,7 +81,10 @@ int main(int argc, char **argv)
                goto err;
        }
 
-       assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0);
+       if (system("which ping6 &>/dev/null") == 0)
+               assert(!system("ping6 localhost -c 10000 -f -q > /dev/null"));
+       else
+               assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null"));
 
        if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
                           &prog_cnt)) {
index 6f61df62f690c69adceb2c328b231ab39b8b9c49..550b7e46bf4a4143059cc7ea58643adf11251353 100644 (file)
@@ -13896,6 +13896,25 @@ static struct bpf_test tests[] = {
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
                .result = ACCEPT,
        },
+       {
+               "calls: ctx read at start of subprog",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+                       BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+                       BPF_EXIT_INSN(),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+               .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
new file mode 100644 (file)
index 0000000..47ed6ce
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for netfilter selftests
+
+TEST_PROGS := nft_trans_stress.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
new file mode 100644 (file)
index 0000000..1017313
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_NET_NS=y
+NF_TABLES_INET=y
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
new file mode 100755 (executable)
index 0000000..f1affd1
--- /dev/null
@@ -0,0 +1,78 @@
+#!/bin/bash
+#
+# This test is for stress-testing the nf_tables config plane path vs.
+# packet path processing: Make sure we never release rules that are
+# still visible to other cpus.
+#
+# set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+testns=testns1
+tables="foo bar baz quux"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+tmp=$(mktemp)
+
+for table in $tables; do
+       echo add table inet "$table" >> "$tmp"
+       echo flush table inet "$table" >> "$tmp"
+
+       echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp"
+       echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp"
+       for c in $(seq 1 400); do
+               chain=$(printf "chain%03u" "$c")
+               echo "add chain inet $table $chain" >> "$tmp"
+       done
+
+       for c in $(seq 1 400); do
+               chain=$(printf "chain%03u" "$c")
+               for BASE in INPUT OUTPUT; do
+                       echo "add rule inet $table $BASE counter jump $chain" >> "$tmp"
+               done
+               echo "add rule inet $table $chain counter return" >> "$tmp"
+       done
+done
+
+ip netns add "$testns"
+ip -netns "$testns" link set lo up
+
+lscpu | grep ^CPU\(s\): | ( read cpu cpunum ;
+cpunum=$((cpunum-1))
+for i in $(seq 0 $cpunum);do
+       mask=$(printf 0x%x $((1<<$i)))
+        ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null &
+        ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null &
+done)
+
+sleep 1
+
+for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
+
+for table in $tables;do
+       randsleep=$((RANDOM%10))
+       sleep $randsleep
+       ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+done
+
+randsleep=$((RANDOM%10))
+sleep $randsleep
+
+pkill -9 ping
+
+wait
+
+rm -f "$tmp"
+ip netns del "$testns"