Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
authorDavid S. Miller <davem@davemloft.net>
Tue, 9 Oct 2018 04:28:55 +0000 (21:28 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 9 Oct 2018 04:28:55 +0000 (21:28 -0700)
Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next tree:

1) Support for matching on ipsec policy already set in the route, from
   Florian Westphal.

2) Split set destruction into deactivate and destroy phase to make it
   fit better into the transaction infrastructure, also from Florian.
   This includes a patch to warn on imbalance when setting the new
   activate and deactivate interfaces.

3) Release transaction list from the workqueue to remove expensive
   synchronize_rcu() from configuration plane path. This speeds up
   configuration plane quite a bit. From Florian Westphal.

4) Add new xfrm/ipsec extension, this new extension allows you to match
   for ipsec tunnel keys such as source and destination address, spi and
   reqid. From Máté Eckl and Florian Westphal.

5) Add secmark support, this includes connsecmark too, patches
   from Christian Gottsche.

6) Allow to specify remaining bytes in xt_quota, from Chenbo Feng.
   One follow up patch to calm a clang warning for this one, from
   Nathan Chancellor.

7) Flush conntrack entries based on layer 3 family, from Kristian Evensen.

8) New revision for cgroups2 to shrink the path field.

9) Get rid of obsolete need_conntrack(), as a result from recent
   demodularization works.

10) Use WARN_ON instead of BUG_ON, from Florian Westphal.

11) Unused exported symbol in nf_nat_ipv4_fn(), from Florian.

12) Remove superfluous check for timeout netlink parser and dump
    functions in layer 4 conntrack helpers.

13) Unnecessary redundant rcu read side locks in NAT redirect,
    from Taehee Yoo.

14) Pass nf_hook_state structure to error handlers, patch from
    Florian Westphal.

15) Remove ->new() interface from layer 4 protocol trackers. Place
    them in the ->packet() interface. From Florian.

16) Place conntrack ->error() handling in the ->packet() interface.
    Patches from Florian Westphal.

17) Remove unused parameter in the pernet initialization path,
    also from Florian.

18) Remove additional parameter to specify layer 3 protocol when
    looking up for protocol tracker. From Florian.

19) Shrink array of layer 4 protocol trackers, from Florian.

20) Check for linear skb only once from the ALG NAT mangling
    codebase, from Taehee Yoo.

21) Use rhashtable_walk_enter() instead of deprecated
    rhashtable_walk_init(), also from Taehee.

22) No need to flush all conntracks when only one single address
    is gone, from Tan Hu.

23) Remove redundant check for NAT flags in flowtable code, from
    Taehee Yoo.

24) Use rhashtable_lookup() instead of rhashtable_lookup_fast()
    from netfilter codebase, since rcu read lock side is already
    assumed in this path.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
53 files changed:
include/linux/netfilter/nf_conntrack_common.h
include/net/netfilter/ipv4/nf_conntrack_ipv4.h
include/net/netfilter/ipv6/nf_conntrack_ipv6.h
include/net/netfilter/nf_conntrack_core.h
include/net/netfilter/nf_conntrack_l4proto.h
include/net/netfilter/nf_tables.h
include/net/netfilter/nf_tables_core.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/netfilter/xt_cgroup.h
include/uapi/linux/netfilter/xt_quota.h
net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
net/ipv6/netfilter/ip6t_ipv6header.c
net/ipv6/netfilter/ip6t_rt.c
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_generic.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_conntrack_proto_icmp.c
net/netfilter/nf_conntrack_proto_icmpv6.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_proto_udp.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_flow_table_ip.c
net/netfilter/nf_nat_helper.c
net/netfilter/nf_nat_redirect.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_core.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nft_cmp.c
net/netfilter/nft_ct.c
net/netfilter/nft_dynset.c
net/netfilter/nft_lookup.c
net/netfilter/nft_meta.c
net/netfilter/nft_objref.c
net/netfilter/nft_reject.c
net/netfilter/nft_rt.c
net/netfilter/nft_set_hash.c
net/netfilter/nft_xfrm.c [new file with mode: 0644]
net/netfilter/xt_CT.c
net/netfilter/xt_IDLETIMER.c
net/netfilter/xt_SECMARK.c
net/netfilter/xt_cgroup.c
net/netfilter/xt_quota.c
net/openvswitch/conntrack.c

index 03097fa70975434ed686d99b4cda8171749656e5..e142b2b5f1ea6715f0280dd0d9ded623322efee5 100644 (file)
@@ -19,7 +19,4 @@ struct ip_conntrack_stat {
        unsigned int search_restart;
 };
 
-/* call to create an explicit dependency on nf_conntrack. */
-void need_conntrack(void);
-
 #endif /* _NF_CONNTRACK_COMMON_H */
index c84b51682f08c68b7dbddcdb9f8b82b2465cefa6..135ee702c7b0373cb22deca14b87e1e7e303b5a9 100644 (file)
 #ifndef _NF_CONNTRACK_IPV4_H
 #define _NF_CONNTRACK_IPV4_H
 
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
 #ifdef CONFIG_NF_CT_PROTO_DCCP
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp;
 #endif
 #ifdef CONFIG_NF_CT_PROTO_SCTP
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp;
 #endif
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite;
 #endif
 
-int nf_conntrack_ipv4_compat_init(void);
-void nf_conntrack_ipv4_compat_fini(void);
-
 #endif /*_NF_CONNTRACK_IPV4_H*/
index effa8dfba68ce9648001ce518eb4940d3a8d4c37..7b3c873f883966386ce6f1f2b5cd60422090ecab 100644 (file)
@@ -2,20 +2,7 @@
 #ifndef _NF_CONNTRACK_IPV6_H
 #define _NF_CONNTRACK_IPV6_H
 
-extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
-
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
-#endif
 
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
index 2a3e0974a6af4029ecba39cf4bb41d2c46d77282..afc9b3620473e96dc2807b0891c8fdce5e46fa25 100644 (file)
@@ -20,8 +20,7 @@
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
    of connection tracking. */
-unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
-                            struct sk_buff *skb);
+unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state);
 
 int nf_conntrack_init_net(struct net *net);
 void nf_conntrack_cleanup_net(struct net *net);
index 8465263b297d70921678fa4c9bf3281c18aaa4ff..eed04af9b75e56b6c33d0887cdefa4c8f827251e 100644 (file)
@@ -18,9 +18,6 @@
 struct seq_file;
 
 struct nf_conntrack_l4proto {
-       /* L3 Protocol number. */
-       u_int16_t l3proto;
-
        /* L4 Protocol number. */
        u_int8_t l4proto;
 
@@ -43,22 +40,14 @@ struct nf_conntrack_l4proto {
 
        /* Returns verdict for packet, or -1 for invalid. */
        int (*packet)(struct nf_conn *ct,
-                     const struct sk_buff *skb,
+                     struct sk_buff *skb,
                      unsigned int dataoff,
-                     enum ip_conntrack_info ctinfo);
-
-       /* Called when a new connection for this protocol found;
-        * returns TRUE if it's OK.  If so, packet() called next. */
-       bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
-                   unsigned int dataoff);
+                     enum ip_conntrack_info ctinfo,
+                     const struct nf_hook_state *state);
 
        /* Called when a conntrack entry is destroyed */
        void (*destroy)(struct nf_conn *ct);
 
-       int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
-                    unsigned int dataoff,
-                    u_int8_t pf, unsigned int hooknum);
-
        /* called by gc worker if table is full */
        bool (*can_early_drop)(const struct nf_conn *ct);
 
@@ -92,7 +81,7 @@ struct nf_conntrack_l4proto {
 #endif
        unsigned int    *net_id;
        /* Init l4proto pernet data */
-       int (*init_net)(struct net *net, u_int16_t proto);
+       int (*init_net)(struct net *net);
 
        /* Return the per-net protocol part. */
        struct nf_proto_net *(*get_net_proto)(struct net *net);
@@ -101,16 +90,23 @@ struct nf_conntrack_l4proto {
        struct module *me;
 };
 
+int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
+                             struct sk_buff *skb,
+                             unsigned int dataoff,
+                             const struct nf_hook_state *state);
+
+int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
+                             struct sk_buff *skb,
+                             unsigned int dataoff,
+                             const struct nf_hook_state *state);
 /* Existing built-in generic protocol */
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
-#define MAX_NF_CT_PROTO 256
+#define MAX_NF_CT_PROTO IPPROTO_UDPLITE
 
-const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
-                                                 u_int8_t l4proto);
+const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto);
 
-const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
-                                                   u_int8_t l4proto);
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto);
 void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
 
 /* Protocol pernet registration. */
index 0f39ac487012c3075a3154c65b22504a15063dcd..841835a387e17849155ae85f63b3197e747f3469 100644 (file)
@@ -470,6 +470,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
                       struct nft_set_binding *binding);
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
                          struct nft_set_binding *binding);
+void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
+                         struct nft_set_binding *binding);
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
 
 /**
  *     enum nft_set_extensions - set extension type IDs
@@ -724,7 +727,9 @@ struct nft_expr_type {
  *     @eval: Expression evaluation function
  *     @size: full expression size, including private data size
  *     @init: initialization function
- *     @destroy: destruction function
+ *     @activate: activate expression in the next generation
+ *     @deactivate: deactivate expression in next generation
+ *     @destroy: destruction function, called after synchronize_rcu
  *     @dump: function to dump parameters
  *     @type: expression type
  *     @validate: validate expression, called during loop detection
@@ -1293,12 +1298,14 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
  *
  *     @list: used internally
  *     @msg_type: message type
+ *     @put_net: ctx->net needs to be put
  *     @ctx: transaction context
  *     @data: internal information related to the transaction
  */
 struct nft_trans {
        struct list_head                list;
        int                             msg_type;
+       bool                            put_net;
        struct nft_ctx                  ctx;
        char                            data[0];
 };
index 8da837d2aaf997d5d642b081154c1bf9d2b3b26b..2046d104f323645341e4c8bdca369b2589120811 100644 (file)
@@ -16,6 +16,10 @@ extern struct nft_expr_type nft_meta_type;
 extern struct nft_expr_type nft_rt_type;
 extern struct nft_expr_type nft_exthdr_type;
 
+#ifdef CONFIG_NETWORK_SECMARK
+extern struct nft_object_type nft_secmark_obj_type;
+#endif
+
 int nf_tables_core_module_init(void);
 void nf_tables_core_module_exit(void);
 
index e23290ffdc77d7d44395e22614264988a5381f69..5444e76870bbc97696c55f8caed791b6e56d1f8c 100644 (file)
@@ -826,12 +826,14 @@ enum nft_meta_keys {
  * @NFT_RT_NEXTHOP4: routing nexthop for IPv4
  * @NFT_RT_NEXTHOP6: routing nexthop for IPv6
  * @NFT_RT_TCPMSS: fetch current path tcp mss
+ * @NFT_RT_XFRM: boolean, skb->dst->xfrm != NULL
  */
 enum nft_rt_keys {
        NFT_RT_CLASSID,
        NFT_RT_NEXTHOP4,
        NFT_RT_NEXTHOP6,
        NFT_RT_TCPMSS,
+       NFT_RT_XFRM,
        __NFT_RT_MAX
 };
 #define NFT_RT_MAX             (__NFT_RT_MAX - 1)
@@ -1174,6 +1176,21 @@ enum nft_quota_attributes {
 };
 #define NFTA_QUOTA_MAX         (__NFTA_QUOTA_MAX - 1)
 
+/**
+ * enum nft_secmark_attributes - nf_tables secmark object netlink attributes
+ *
+ * @NFTA_SECMARK_CTX: security context (NLA_STRING)
+ */
+enum nft_secmark_attributes {
+       NFTA_SECMARK_UNSPEC,
+       NFTA_SECMARK_CTX,
+       __NFTA_SECMARK_MAX,
+};
+#define NFTA_SECMARK_MAX       (__NFTA_SECMARK_MAX - 1)
+
+/* Max security context length */
+#define NFT_SECMARK_CTX_MAXLEN         256
+
 /**
  * enum nft_reject_types - nf_tables reject expression reject types
  *
@@ -1430,7 +1447,8 @@ enum nft_ct_timeout_timeout_attributes {
 #define NFT_OBJECT_CONNLIMIT   5
 #define NFT_OBJECT_TUNNEL      6
 #define NFT_OBJECT_CT_TIMEOUT  7
-#define __NFT_OBJECT_MAX       8
+#define NFT_OBJECT_SECMARK     8
+#define __NFT_OBJECT_MAX       9
 #define NFT_OBJECT_MAX         (__NFT_OBJECT_MAX - 1)
 
 /**
@@ -1512,6 +1530,35 @@ enum nft_devices_attributes {
 };
 #define NFTA_DEVICE_MAX                (__NFTA_DEVICE_MAX - 1)
 
+/*
+ * enum nft_xfrm_attributes - nf_tables xfrm expr netlink attributes
+ *
+ * @NFTA_XFRM_DREG: destination register (NLA_U32)
+ * @NFTA_XFRM_KEY: enum nft_xfrm_keys (NLA_U32)
+ * @NFTA_XFRM_DIR: direction (NLA_U8)
+ * @NFTA_XFRM_SPNUM: index in secpath array (NLA_U32)
+ */
+enum nft_xfrm_attributes {
+       NFTA_XFRM_UNSPEC,
+       NFTA_XFRM_DREG,
+       NFTA_XFRM_KEY,
+       NFTA_XFRM_DIR,
+       NFTA_XFRM_SPNUM,
+       __NFTA_XFRM_MAX
+};
+#define NFTA_XFRM_MAX (__NFTA_XFRM_MAX - 1)
+
+enum nft_xfrm_keys {
+       NFT_XFRM_KEY_UNSPEC,
+       NFT_XFRM_KEY_DADDR_IP4,
+       NFT_XFRM_KEY_DADDR_IP6,
+       NFT_XFRM_KEY_SADDR_IP4,
+       NFT_XFRM_KEY_SADDR_IP6,
+       NFT_XFRM_KEY_REQID,
+       NFT_XFRM_KEY_SPI,
+       __NFT_XFRM_KEY_MAX,
+};
+#define NFT_XFRM_KEY_MAX (__NFT_XFRM_KEY_MAX - 1)
 
 /**
  * enum nft_trace_attributes - nf_tables trace netlink attributes
index e96dfa1b34f7ff8a118105b936b2e1324d7ebe94..b74e370d613346b1669082313ef6de4b6ed3ed4b 100644 (file)
@@ -22,4 +22,20 @@ struct xt_cgroup_info_v1 {
        void            *priv __attribute__((aligned(8)));
 };
 
+#define XT_CGROUP_PATH_MAX     512
+
+struct xt_cgroup_info_v2 {
+       __u8            has_path;
+       __u8            has_classid;
+       __u8            invert_path;
+       __u8            invert_classid;
+       union {
+               char    path[XT_CGROUP_PATH_MAX];
+               __u32   classid;
+       };
+
+       /* kernel internal data */
+       void            *priv __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_CGROUP_H */
index f3ba5d9e58b6da4120db1c87877937315e68bbbb..d72fd52adbba62ab280c5bb79741681666ac3d11 100644 (file)
@@ -15,9 +15,11 @@ struct xt_quota_info {
        __u32 flags;
        __u32 pad;
        __aligned_u64 quota;
-
-       /* Used internally by the kernel */
-       struct xt_quota_priv    *master;
+#ifdef __KERNEL__
+       atomic64_t counter;
+#else
+       __aligned_u64 remain;
+#endif
 };
 
 #endif /* _XT_QUOTA_H */
index 6115bf1ff6f0a16f5114a095646808ab2ef63405..78a67f961d86dafe09c2b9b4ccff1709a88261e4 100644 (file)
@@ -264,7 +264,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 
        return nf_nat_inet_fn(priv, skb, state);
 }
-EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
 
 static unsigned int
 nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
index ad3aeff152ede37e5d39b0e5bcf05a0ad5c0904e..a9d5e013e5556a5bace7afcb61cabeb0849261d1 100644 (file)
@@ -104,12 +104,26 @@ static int masq_device_event(struct notifier_block *this,
        return NOTIFY_DONE;
 }
 
+static int inet_cmp(struct nf_conn *ct, void *ptr)
+{
+       struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+       struct net_device *dev = ifa->ifa_dev->dev;
+       struct nf_conntrack_tuple *tuple;
+
+       if (!device_cmp(ct, (void *)(long)dev->ifindex))
+               return 0;
+
+       tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+       return ifa->ifa_address == tuple->dst.u3.ip;
+}
+
 static int masq_inet_event(struct notifier_block *this,
                           unsigned long event,
                           void *ptr)
 {
        struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
-       struct netdev_notifier_info info;
+       struct net *net = dev_net(idev->dev);
 
        /* The masq_dev_notifier will catch the case of the device going
         * down.  So if the inetdev is dead and being destroyed we have
@@ -119,8 +133,10 @@ static int masq_inet_event(struct notifier_block *this,
        if (idev->dead)
                return NOTIFY_DONE;
 
-       netdev_notifier_info_init(&info, idev->dev);
-       return masq_device_event(this, event, &info);
+       if (event == NETDEV_DOWN)
+               nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+
+       return NOTIFY_DONE;
 }
 
 static struct notifier_block masq_dev_notifier = {
index 8b147440fbdced8dbc23023785596f0565b6ddef..af737b47b9b56d28ce02c93ecd2870084cc4c265 100644 (file)
@@ -65,7 +65,10 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
                }
 
                hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-               BUG_ON(hp == NULL);
+               if (!hp) {
+                       par->hotdrop = true;
+                       return false;
+               }
 
                /* Calculate the header length */
                if (nexthdr == NEXTHDR_FRAGMENT)
index 2c99b94eeca3221bbc0887e82d1afe4e53e9efce..21bf6bf043232ac3fef34e5dba142dfad8e31b77 100644 (file)
@@ -137,7 +137,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
                                                        sizeof(_addr),
                                                        &_addr);
 
-                               BUG_ON(ap == NULL);
+                               if (ap == NULL) {
+                                       par->hotdrop = true;
+                                       return false;
+                               }
 
                                if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
                                        pr_debug("i=%d temp=%d;\n", i, temp);
@@ -166,7 +169,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
                                                        + temp * sizeof(_addr),
                                                        sizeof(_addr),
                                                        &_addr);
-                               BUG_ON(ap == NULL);
+                               if (ap == NULL) {
+                                       par->hotdrop = true;
+                                       return false;
+                               }
 
                                if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
                                        break;
index e6eb7cf9b54fd5e5c81b14836b0d629d5cccfd6d..3e4bf2286abea96617f8df1ecac74d91667ef59f 100644 (file)
@@ -87,18 +87,30 @@ static struct notifier_block masq_dev_notifier = {
 struct masq_dev_work {
        struct work_struct work;
        struct net *net;
+       struct in6_addr addr;
        int ifindex;
 };
 
+static int inet_cmp(struct nf_conn *ct, void *work)
+{
+       struct masq_dev_work *w = (struct masq_dev_work *)work;
+       struct nf_conntrack_tuple *tuple;
+
+       if (!device_cmp(ct, (void *)(long)w->ifindex))
+               return 0;
+
+       tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+       return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
+}
+
 static void iterate_cleanup_work(struct work_struct *work)
 {
        struct masq_dev_work *w;
-       long index;
 
        w = container_of(work, struct masq_dev_work, work);
 
-       index = w->ifindex;
-       nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0);
+       nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0);
 
        put_net(w->net);
        kfree(w);
@@ -147,6 +159,7 @@ static int masq_inet_event(struct notifier_block *this,
                INIT_WORK(&w->work, iterate_cleanup_work);
                w->ifindex = dev->ifindex;
                w->net = net;
+               w->addr = ifa->addr;
                schedule_work(&w->work);
 
                return NOTIFY_DONE;
index f61c306de1d089358ede089f87f97c72c8927670..2ab870ef233a83bda49710a1cb8feddbcb13c010 100644 (file)
@@ -625,6 +625,13 @@ config NFT_FIB_INET
          The lookup will be delegated to the IPv4 or IPv6 FIB depending
          on the protocol of the packet.
 
+config NFT_XFRM
+       tristate "Netfilter nf_tables xfrm/IPSec security association matching"
+       depends on XFRM
+       help
+         This option adds an expression that you can use to extract properties
+         of a packets security association.
+
 config NFT_SOCKET
        tristate "Netfilter nf_tables socket match support"
        depends on IPV6 || IPV6=n
index 16895e045b66b97929937a5c96d3c83a828bdb18..4ddf3ef51ecef1262fe760934f5bb6ccca23d5ba 100644 (file)
@@ -113,6 +113,7 @@ obj-$(CONFIG_NFT_FIB_NETDEV)        += nft_fib_netdev.o
 obj-$(CONFIG_NFT_SOCKET)       += nft_socket.o
 obj-$(CONFIG_NFT_OSF)          += nft_osf.o
 obj-$(CONFIG_NFT_TPROXY)       += nft_tproxy.o
+obj-$(CONFIG_NFT_XFRM)         += nft_xfrm.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)   += nft_dup_netdev.o
index a676d5f76bdc26b8a3f192a82a0cbad2e1bc866e..ca1168d67fac6c0fc1eaef5dfeb1db8428e51db3 100644 (file)
@@ -379,7 +379,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
                return false;
        }
 
-       l4proto = __nf_ct_l4proto_find(l3num, protonum);
+       l4proto = __nf_ct_l4proto_find(protonum);
 
        ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
                              l4proto);
@@ -539,7 +539,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
                nf_ct_tmpl_free(ct);
                return;
        }
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
        if (l4proto->destroy)
                l4proto->destroy(ct);
 
@@ -840,7 +840,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
        enum ip_conntrack_info oldinfo;
        struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
 
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
        if (l4proto->allow_clash &&
            !nf_ct_is_dying(ct) &&
            atomic_inc_not_zero(&ct->ct_general.use)) {
@@ -1109,7 +1109,7 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
        if (!test_bit(IPS_ASSURED_BIT, &ct->status))
                return true;
 
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
        if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
                return true;
 
@@ -1370,12 +1370,6 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 
        timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
 
-       if (!l4proto->new(ct, skb, dataoff)) {
-               nf_conntrack_free(ct);
-               pr_debug("can't track with proto module\n");
-               return NULL;
-       }
-
        if (timeout_ext)
                nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
                                      GFP_ATOMIC);
@@ -1436,12 +1430,12 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 
 /* On success, returns 0, sets skb->_nfct | ctinfo */
 static int
-resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
+resolve_normal_ct(struct nf_conn *tmpl,
                  struct sk_buff *skb,
                  unsigned int dataoff,
-                 u_int16_t l3num,
                  u_int8_t protonum,
-                 const struct nf_conntrack_l4proto *l4proto)
+                 const struct nf_conntrack_l4proto *l4proto,
+                 const struct nf_hook_state *state)
 {
        const struct nf_conntrack_zone *zone;
        struct nf_conntrack_tuple tuple;
@@ -1452,17 +1446,18 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
        u32 hash;
 
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
-                            dataoff, l3num, protonum, net, &tuple, l4proto)) {
+                            dataoff, state->pf, protonum, state->net,
+                            &tuple, l4proto)) {
                pr_debug("Can't get tuple\n");
                return 0;
        }
 
        /* look for tuple match */
        zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-       hash = hash_conntrack_raw(&tuple, net);
-       h = __nf_conntrack_find_get(net, zone, &tuple, hash);
+       hash = hash_conntrack_raw(&tuple, state->net);
+       h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
        if (!h) {
-               h = init_conntrack(net, tmpl, &tuple, l4proto,
+               h = init_conntrack(state->net, tmpl, &tuple, l4proto,
                                   skb, dataoff, hash);
                if (!h)
                        return 0;
@@ -1491,13 +1486,45 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
        return 0;
 }
 
+/*
+ * icmp packets need special treatment to handle error messages that are
+ * related to a connection.
+ *
+ * Callers need to check if skb has a conntrack assigned when this
+ * helper returns; in such case skb belongs to an already known connection.
+ */
+static unsigned int __cold
+nf_conntrack_handle_icmp(struct nf_conn *tmpl,
+                        struct sk_buff *skb,
+                        unsigned int dataoff,
+                        u8 protonum,
+                        const struct nf_hook_state *state)
+{
+       int ret;
+
+       if (state->pf == NFPROTO_IPV4 && protonum == IPPROTO_ICMP)
+               ret = nf_conntrack_icmpv4_error(tmpl, skb, dataoff, state);
+#if IS_ENABLED(CONFIG_IPV6)
+       else if (state->pf == NFPROTO_IPV6 && protonum == IPPROTO_ICMPV6)
+               ret = nf_conntrack_icmpv6_error(tmpl, skb, dataoff, state);
+#endif
+       else
+               return NF_ACCEPT;
+
+       if (ret <= 0) {
+               NF_CT_STAT_INC_ATOMIC(state->net, error);
+               NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+       }
+
+       return ret;
+}
+
 unsigned int
-nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
-               struct sk_buff *skb)
+nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
 {
        const struct nf_conntrack_l4proto *l4proto;
-       struct nf_conn *ct, *tmpl;
        enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct, *tmpl;
        u_int8_t protonum;
        int dataoff, ret;
 
@@ -1506,32 +1533,28 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                /* Previously seen (loopback or untracked)?  Ignore. */
                if ((tmpl && !nf_ct_is_template(tmpl)) ||
                     ctinfo == IP_CT_UNTRACKED) {
-                       NF_CT_STAT_INC_ATOMIC(net, ignore);
+                       NF_CT_STAT_INC_ATOMIC(state->net, ignore);
                        return NF_ACCEPT;
                }
                skb->_nfct = 0;
        }
 
        /* rcu_read_lock()ed by nf_hook_thresh */
-       dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
+       dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum);
        if (dataoff <= 0) {
                pr_debug("not prepared to track yet or error occurred\n");
-               NF_CT_STAT_INC_ATOMIC(net, error);
-               NF_CT_STAT_INC_ATOMIC(net, invalid);
+               NF_CT_STAT_INC_ATOMIC(state->net, error);
+               NF_CT_STAT_INC_ATOMIC(state->net, invalid);
                ret = NF_ACCEPT;
                goto out;
        }
 
-       l4proto = __nf_ct_l4proto_find(pf, protonum);
+       l4proto = __nf_ct_l4proto_find(protonum);
 
-       /* It may be an special packet, error, unclean...
-        * inverse of the return code tells to the netfilter
-        * core what to do with the packet. */
-       if (l4proto->error != NULL) {
-               ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
+       if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) {
+               ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff,
+                                              protonum, state);
                if (ret <= 0) {
-                       NF_CT_STAT_INC_ATOMIC(net, error);
-                       NF_CT_STAT_INC_ATOMIC(net, invalid);
                        ret = -ret;
                        goto out;
                }
@@ -1540,10 +1563,11 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                        goto out;
        }
 repeat:
-       ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
+       ret = resolve_normal_ct(tmpl, skb, dataoff,
+                               protonum, l4proto, state);
        if (ret < 0) {
                /* Too stressed to deal. */
-               NF_CT_STAT_INC_ATOMIC(net, drop);
+               NF_CT_STAT_INC_ATOMIC(state->net, drop);
                ret = NF_DROP;
                goto out;
        }
@@ -1551,21 +1575,21 @@ repeat:
        ct = nf_ct_get(skb, &ctinfo);
        if (!ct) {
                /* Not valid part of a connection */
-               NF_CT_STAT_INC_ATOMIC(net, invalid);
+               NF_CT_STAT_INC_ATOMIC(state->net, invalid);
                ret = NF_ACCEPT;
                goto out;
        }
 
-       ret = l4proto->packet(ct, skb, dataoff, ctinfo);
+       ret = l4proto->packet(ct, skb, dataoff, ctinfo, state);
        if (ret <= 0) {
                /* Invalid: inverse of the return code tells
                 * the netfilter core what to do */
                pr_debug("nf_conntrack_in: Can't track with proto module\n");
                nf_conntrack_put(&ct->ct_general);
                skb->_nfct = 0;
-               NF_CT_STAT_INC_ATOMIC(net, invalid);
+               NF_CT_STAT_INC_ATOMIC(state->net, invalid);
                if (ret == -NF_DROP)
-                       NF_CT_STAT_INC_ATOMIC(net, drop);
+                       NF_CT_STAT_INC_ATOMIC(state->net, drop);
                /* Special case: TCP tracker reports an attempt to reopen a
                 * closed/aborted connection. We have to go back and create a
                 * fresh conntrack.
@@ -1594,8 +1618,7 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
 
        rcu_read_lock();
        ret = nf_ct_invert_tuple(inverse, orig,
-                                __nf_ct_l4proto_find(orig->src.l3num,
-                                                     orig->dst.protonum));
+                                __nf_ct_l4proto_find(orig->dst.protonum));
        rcu_read_unlock();
        return ret;
 }
@@ -1752,7 +1775,7 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
        if (dataoff <= 0)
                return -1;
 
-       l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+       l4proto = nf_ct_l4proto_find_get(l4num);
 
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
                             l4num, net, &tuple, l4proto))
index 27b84231db10178288ed00ecdfc53bfe5a9407f0..3034038bfdf0557cc66c8c4789800e3f2e6451b3 100644 (file)
@@ -610,8 +610,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
                   expect->tuple.src.l3num,
                   expect->tuple.dst.protonum);
        print_tuple(s, &expect->tuple,
-                   __nf_ct_l4proto_find(expect->tuple.src.l3num,
-                                      expect->tuple.dst.protonum));
+                   __nf_ct_l4proto_find(expect->tuple.dst.protonum));
 
        if (expect->flags & NF_CT_EXPECT_PERMANENT) {
                seq_puts(s, "PERMANENT");
index 036207ecaf1663e2ddeb8bf31a5a63e56e319302..4ae8e528943aca9f1881c3b4f77e5ebc231ebe14 100644 (file)
@@ -135,8 +135,7 @@ static int ctnetlink_dump_tuples(struct sk_buff *skb,
        ret = ctnetlink_dump_tuples_ip(skb, tuple);
 
        if (ret >= 0) {
-               l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
-                                              tuple->dst.protonum);
+               l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
                ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
        }
        rcu_read_unlock();
@@ -184,7 +183,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
        struct nlattr *nest_proto;
        int ret;
 
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
        if (!l4proto->to_nlattr)
                return 0;
 
@@ -592,7 +591,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
        len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
        len *= 3u; /* ORIG, REPLY, MASTER */
 
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
        len += l4proto->nlattr_size;
        if (l4proto->nlattr_tuple_size) {
                len4 = l4proto->nlattr_tuple_size();
@@ -821,6 +820,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
 }
 
 struct ctnetlink_filter {
+       u8 family;
        struct {
                u_int32_t val;
                u_int32_t mask;
@@ -828,31 +828,39 @@ struct ctnetlink_filter {
 };
 
 static struct ctnetlink_filter *
-ctnetlink_alloc_filter(const struct nlattr * const cda[])
+ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 {
-#ifdef CONFIG_NF_CONNTRACK_MARK
        struct ctnetlink_filter *filter;
 
+#ifndef CONFIG_NF_CONNTRACK_MARK
+       if (cda[CTA_MARK] && cda[CTA_MARK_MASK])
+               return ERR_PTR(-EOPNOTSUPP);
+#endif
+
        filter = kzalloc(sizeof(*filter), GFP_KERNEL);
        if (filter == NULL)
                return ERR_PTR(-ENOMEM);
 
-       filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
-       filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+       filter->family = family;
 
-       return filter;
-#else
-       return ERR_PTR(-EOPNOTSUPP);
+#ifdef CONFIG_NF_CONNTRACK_MARK
+       if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
+               filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
+               filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+       }
 #endif
+       return filter;
 }
 
 static int ctnetlink_start(struct netlink_callback *cb)
 {
        const struct nlattr * const *cda = cb->data;
        struct ctnetlink_filter *filter = NULL;
+       struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+       u8 family = nfmsg->nfgen_family;
 
-       if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
-               filter = ctnetlink_alloc_filter(cda);
+       if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+               filter = ctnetlink_alloc_filter(cda, family);
                if (IS_ERR(filter))
                        return PTR_ERR(filter);
        }
@@ -866,13 +874,24 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
        struct ctnetlink_filter *filter = data;
 
        if (filter == NULL)
-               return 1;
+               goto out;
+
+       /* Match entries of a given L3 protocol number.
+        * If it is not specified, ie. l3proto == 0,
+        * then match everything.
+        */
+       if (filter->family && nf_ct_l3num(ct) != filter->family)
+               goto ignore_entry;
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-       if ((ct->mark & filter->mark.mask) == filter->mark.val)
-               return 1;
+       if ((ct->mark & filter->mark.mask) != filter->mark.val)
+               goto ignore_entry;
 #endif
 
+out:
+       return 1;
+
+ignore_entry:
        return 0;
 }
 
@@ -883,8 +902,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
        struct nf_conn *ct, *last;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_node *n;
-       struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
-       u_int8_t l3proto = nfmsg->nfgen_family;
        struct nf_conn *nf_ct_evict[8];
        int res, i;
        spinlock_t *lockp;
@@ -923,11 +940,6 @@ restart:
                        if (!net_eq(net, nf_ct_net(ct)))
                                continue;
 
-                       /* Dump entries of a given L3 protocol number.
-                        * If it is not specified, ie. l3proto == 0,
-                        * then dump everything. */
-                       if (l3proto && nf_ct_l3num(ct) != l3proto)
-                               continue;
                        if (cb->args[1]) {
                                if (ct != last)
                                        continue;
@@ -1048,7 +1060,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
        tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
 
        rcu_read_lock();
-       l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
+       l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
 
        if (likely(l4proto->nlattr_to_tuple)) {
                ret = nla_validate_nested(attr, CTA_PROTO_MAX,
@@ -1213,12 +1225,12 @@ static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
 
 static int ctnetlink_flush_conntrack(struct net *net,
                                     const struct nlattr * const cda[],
-                                    u32 portid, int report)
+                                    u32 portid, int report, u8 family)
 {
        struct ctnetlink_filter *filter = NULL;
 
-       if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
-               filter = ctnetlink_alloc_filter(cda);
+       if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+               filter = ctnetlink_alloc_filter(cda, family);
                if (IS_ERR(filter))
                        return PTR_ERR(filter);
        }
@@ -1257,7 +1269,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
        else {
                return ctnetlink_flush_conntrack(net, cda,
                                                 NETLINK_CB(skb).portid,
-                                                nlmsg_report(nlh));
+                                                nlmsg_report(nlh), u3);
        }
 
        if (err < 0)
@@ -1696,7 +1708,7 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
                return err;
 
        rcu_read_lock();
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
        if (l4proto->from_nlattr)
                err = l4proto->from_nlattr(tb, ct);
        rcu_read_unlock();
@@ -2656,8 +2668,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
        rcu_read_lock();
        ret = ctnetlink_dump_tuples_ip(skb, &m);
        if (ret >= 0) {
-               l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
-                                              tuple->dst.protonum);
+               l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
        ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
        }
        rcu_read_unlock();
index 51c5d7eec0a3517518a38cce411bf7b09189c15e..40643af7137e617d7492d8803581f5c39ac6902f 100644 (file)
@@ -43,7 +43,7 @@
 
 extern unsigned int nf_conntrack_net_id;
 
-static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
+static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __read_mostly;
 
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
@@ -124,23 +124,21 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
 #endif
 
-const struct nf_conntrack_l4proto *
-__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
+const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto)
 {
-       if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
+       if (unlikely(l4proto >= ARRAY_SIZE(nf_ct_protos)))
                return &nf_conntrack_l4proto_generic;
 
-       return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
+       return rcu_dereference(nf_ct_protos[l4proto]);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
 
-const struct nf_conntrack_l4proto *
-nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num)
 {
        const struct nf_conntrack_l4proto *p;
 
        rcu_read_lock();
-       p = __nf_ct_l4proto_find(l3num, l4num);
+       p = __nf_ct_l4proto_find(l4num);
        if (!try_module_get(p->me))
                p = &nf_conntrack_l4proto_generic;
        rcu_read_unlock();
@@ -159,8 +157,7 @@ static int kill_l4proto(struct nf_conn *i, void *data)
 {
        const struct nf_conntrack_l4proto *l4proto;
        l4proto = data;
-       return nf_ct_protonum(i) == l4proto->l4proto &&
-              nf_ct_l3num(i) == l4proto->l3proto;
+       return nf_ct_protonum(i) == l4proto->l4proto;
 }
 
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
@@ -219,48 +216,20 @@ int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
 {
        int ret = 0;
 
-       if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
-               return -EBUSY;
-
        if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
            (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
                return -EINVAL;
 
        mutex_lock(&nf_ct_proto_mutex);
-       if (!nf_ct_protos[l4proto->l3proto]) {
-               /* l3proto may be loaded latter. */
-               struct nf_conntrack_l4proto __rcu **proto_array;
-               int i;
-
-               proto_array =
-                       kmalloc_array(MAX_NF_CT_PROTO,
-                                     sizeof(struct nf_conntrack_l4proto *),
-                                     GFP_KERNEL);
-               if (proto_array == NULL) {
-                       ret = -ENOMEM;
-                       goto out_unlock;
-               }
-
-               for (i = 0; i < MAX_NF_CT_PROTO; i++)
-                       RCU_INIT_POINTER(proto_array[i],
-                                        &nf_conntrack_l4proto_generic);
-
-               /* Before making proto_array visible to lockless readers,
-                * we must make sure its content is committed to memory.
-                */
-               smp_wmb();
-
-               nf_ct_protos[l4proto->l3proto] = proto_array;
-       } else if (rcu_dereference_protected(
-                       nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+       if (rcu_dereference_protected(
+                       nf_ct_protos[l4proto->l4proto],
                        lockdep_is_held(&nf_ct_proto_mutex)
                        ) != &nf_conntrack_l4proto_generic) {
                ret = -EBUSY;
                goto out_unlock;
        }
 
-       rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
-                          l4proto);
+       rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], l4proto);
 out_unlock:
        mutex_unlock(&nf_ct_proto_mutex);
        return ret;
@@ -274,7 +243,7 @@ int nf_ct_l4proto_pernet_register_one(struct net *net,
        struct nf_proto_net *pn = NULL;
 
        if (l4proto->init_net) {
-               ret = l4proto->init_net(net, l4proto->l3proto);
+               ret = l4proto->init_net(net);
                if (ret < 0)
                        goto out;
        }
@@ -296,13 +265,13 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
 static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 
 {
-       BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
+       BUG_ON(l4proto->l4proto >= ARRAY_SIZE(nf_ct_protos));
 
        BUG_ON(rcu_dereference_protected(
-                       nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+                       nf_ct_protos[l4proto->l4proto],
                        lockdep_is_held(&nf_ct_proto_mutex)
                        ) != l4proto);
-       rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+       rcu_assign_pointer(nf_ct_protos[l4proto->l4proto],
                           &nf_conntrack_l4proto_generic);
 }
 
@@ -352,7 +321,7 @@ static int
 nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
                       unsigned int num_proto)
 {
-       int ret = -EINVAL, ver;
+       int ret = -EINVAL;
        unsigned int i;
 
        for (i = 0; i < num_proto; i++) {
@@ -361,9 +330,8 @@ nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
                        break;
        }
        if (i != num_proto) {
-               ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
-               pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
-                      ver, l4proto[i]->l4proto);
+               pr_err("nf_conntrack: can't register l4 %d proto.\n",
+                      l4proto[i]->l4proto);
                nf_ct_l4proto_unregister(l4proto, i);
        }
        return ret;
@@ -382,9 +350,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
                        break;
        }
        if (i != num_proto) {
-               pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
-                      l4proto[i]->l4proto,
-                      l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
+               pr_err("nf_conntrack %d: pernet registration failed\n",
+                      l4proto[i]->l4proto);
                nf_ct_l4proto_pernet_unregister(net, l4proto, i);
        }
        return ret;
@@ -455,7 +422,7 @@ static unsigned int ipv4_conntrack_in(void *priv,
                                      struct sk_buff *skb,
                                      const struct nf_hook_state *state)
 {
-       return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+       return nf_conntrack_in(skb, state);
 }
 
 static unsigned int ipv4_conntrack_local(void *priv,
@@ -477,7 +444,7 @@ static unsigned int ipv4_conntrack_local(void *priv,
                return NF_ACCEPT;
        }
 
-       return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+       return nf_conntrack_in(skb, state);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
@@ -690,14 +657,14 @@ static unsigned int ipv6_conntrack_in(void *priv,
                                      struct sk_buff *skb,
                                      const struct nf_hook_state *state)
 {
-       return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+       return nf_conntrack_in(skb, state);
 }
 
 static unsigned int ipv6_conntrack_local(void *priv,
                                         struct sk_buff *skb,
                                         const struct nf_hook_state *state)
 {
-       return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+       return nf_conntrack_in(skb, state);
 }
 
 static unsigned int ipv6_helper(void *priv,
@@ -911,37 +878,26 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto)
 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
 
 static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
-       &nf_conntrack_l4proto_tcp4,
-       &nf_conntrack_l4proto_udp4,
+       &nf_conntrack_l4proto_tcp,
+       &nf_conntrack_l4proto_udp,
        &nf_conntrack_l4proto_icmp,
 #ifdef CONFIG_NF_CT_PROTO_DCCP
-       &nf_conntrack_l4proto_dccp4,
+       &nf_conntrack_l4proto_dccp,
 #endif
 #ifdef CONFIG_NF_CT_PROTO_SCTP
-       &nf_conntrack_l4proto_sctp4,
+       &nf_conntrack_l4proto_sctp,
 #endif
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
-       &nf_conntrack_l4proto_udplite4,
+       &nf_conntrack_l4proto_udplite,
 #endif
 #if IS_ENABLED(CONFIG_IPV6)
-       &nf_conntrack_l4proto_tcp6,
-       &nf_conntrack_l4proto_udp6,
        &nf_conntrack_l4proto_icmpv6,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-       &nf_conntrack_l4proto_dccp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-       &nf_conntrack_l4proto_sctp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-       &nf_conntrack_l4proto_udplite6,
-#endif
 #endif /* CONFIG_IPV6 */
 };
 
 int nf_conntrack_proto_init(void)
 {
-       int ret = 0;
+       int ret = 0, i;
 
        ret = nf_register_sockopt(&so_getorigdst);
        if (ret < 0)
@@ -952,6 +908,11 @@ int nf_conntrack_proto_init(void)
        if (ret < 0)
                goto cleanup_sockopt;
 #endif
+
+       for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
+               RCU_INIT_POINTER(nf_ct_protos[i],
+                                &nf_conntrack_l4proto_generic);
+
        ret = nf_ct_l4proto_register(builtin_l4proto,
                                     ARRAY_SIZE(builtin_l4proto));
        if (ret < 0)
@@ -969,17 +930,10 @@ cleanup_sockopt:
 
 void nf_conntrack_proto_fini(void)
 {
-       unsigned int i;
-
        nf_unregister_sockopt(&so_getorigdst);
 #if IS_ENABLED(CONFIG_IPV6)
        nf_unregister_sockopt(&so_getorigdst6);
 #endif
-       /* No need to call nf_ct_l4proto_unregister(), the register
-        * tables are free'd here anyway.
-        */
-       for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
-               kfree(nf_ct_protos[i]);
 }
 
 int nf_conntrack_proto_pernet_init(struct net *net)
@@ -988,8 +942,7 @@ int nf_conntrack_proto_pernet_init(struct net *net)
        struct nf_proto_net *pn = nf_ct_l4proto_net(net,
                                        &nf_conntrack_l4proto_generic);
 
-       err = nf_conntrack_l4proto_generic.init_net(net,
-                                       nf_conntrack_l4proto_generic.l3proto);
+       err = nf_conntrack_l4proto_generic.init_net(net);
        if (err < 0)
                return err;
        err = nf_ct_l4proto_register_sysctl(net,
index f3f91ed2c21adce5dcc5e9ba06d6a843a6af39bb..171e9e122e5f1e8b8840e41013d86246ba8025b9 100644 (file)
@@ -389,18 +389,15 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
        return &net->ct.nf_ct_proto.dccp;
 }
 
-static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                    unsigned int dataoff)
+static noinline bool
+dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
+        const struct dccp_hdr *dh)
 {
        struct net *net = nf_ct_net(ct);
        struct nf_dccp_net *dn;
-       struct dccp_hdr _dh, *dh;
        const char *msg;
        u_int8_t state;
 
-       dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
-       BUG_ON(dh == NULL);
-
        state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
        switch (state) {
        default:
@@ -438,8 +435,51 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
                     ntohl(dhack->dccph_ack_nr_low);
 }
 
-static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
-                      unsigned int dataoff, enum ip_conntrack_info ctinfo)
+static bool dccp_error(const struct dccp_hdr *dh,
+                      struct sk_buff *skb, unsigned int dataoff,
+                      const struct nf_hook_state *state)
+{
+       unsigned int dccp_len = skb->len - dataoff;
+       unsigned int cscov;
+       const char *msg;
+
+       if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
+           dh->dccph_doff * 4 > dccp_len) {
+               msg = "nf_ct_dccp: truncated/malformed packet ";
+               goto out_invalid;
+       }
+
+       cscov = dccp_len;
+       if (dh->dccph_cscov) {
+               cscov = (dh->dccph_cscov - 1) * 4;
+               if (cscov > dccp_len) {
+                       msg = "nf_ct_dccp: bad checksum coverage ";
+                       goto out_invalid;
+               }
+       }
+
+       if (state->hook == NF_INET_PRE_ROUTING &&
+           state->net->ct.sysctl_checksum &&
+           nf_checksum_partial(skb, state->hook, dataoff, cscov,
+                               IPPROTO_DCCP, state->pf)) {
+               msg = "nf_ct_dccp: bad checksum ";
+               goto out_invalid;
+       }
+
+       if (dh->dccph_type >= DCCP_PKT_INVALID) {
+               msg = "nf_ct_dccp: reserved packet type ";
+               goto out_invalid;
+       }
+       return false;
+out_invalid:
+       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                              IPPROTO_DCCP, "%s", msg);
+       return true;
+}
+
+static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
+                      unsigned int dataoff, enum ip_conntrack_info ctinfo,
+                      const struct nf_hook_state *state)
 {
        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
        struct dccp_hdr _dh, *dh;
@@ -448,8 +488,15 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
        unsigned int *timeouts;
 
        dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
-       BUG_ON(dh == NULL);
+       if (!dh)
+               return NF_DROP;
+
+       if (dccp_error(dh, skb, dataoff, state))
+               return -NF_ACCEPT;
+
        type = dh->dccph_type;
+       if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh))
+               return -NF_ACCEPT;
 
        if (type == DCCP_PKT_RESET &&
            !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@@ -527,55 +574,6 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
        return NF_ACCEPT;
 }
 
-static int dccp_error(struct net *net, struct nf_conn *tmpl,
-                     struct sk_buff *skb, unsigned int dataoff,
-                     u_int8_t pf, unsigned int hooknum)
-{
-       struct dccp_hdr _dh, *dh;
-       unsigned int dccp_len = skb->len - dataoff;
-       unsigned int cscov;
-       const char *msg;
-
-       dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
-       if (dh == NULL) {
-               msg = "nf_ct_dccp: short packet ";
-               goto out_invalid;
-       }
-
-       if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
-           dh->dccph_doff * 4 > dccp_len) {
-               msg = "nf_ct_dccp: truncated/malformed packet ";
-               goto out_invalid;
-       }
-
-       cscov = dccp_len;
-       if (dh->dccph_cscov) {
-               cscov = (dh->dccph_cscov - 1) * 4;
-               if (cscov > dccp_len) {
-                       msg = "nf_ct_dccp: bad checksum coverage ";
-                       goto out_invalid;
-               }
-       }
-
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
-                               pf)) {
-               msg = "nf_ct_dccp: bad checksum ";
-               goto out_invalid;
-       }
-
-       if (dh->dccph_type >= DCCP_PKT_INVALID) {
-               msg = "nf_ct_dccp: reserved packet type ";
-               goto out_invalid;
-       }
-
-       return NF_ACCEPT;
-
-out_invalid:
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
-       return -NF_ACCEPT;
-}
-
 static bool dccp_can_early_drop(const struct nf_conn *ct)
 {
        switch (ct->proto.dccp.state) {
@@ -814,7 +812,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
        return 0;
 }
 
-static int dccp_init_net(struct net *net, u_int16_t proto)
+static int dccp_init_net(struct net *net)
 {
        struct nf_dccp_net *dn = dccp_pernet(net);
        struct nf_proto_net *pn = &dn->pn;
@@ -844,45 +842,9 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
        return &net->ct.nf_ct_proto.dccp.pn;
 }
 
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
-       .l3proto                = AF_INET,
-       .l4proto                = IPPROTO_DCCP,
-       .new                    = dccp_new,
-       .packet                 = dccp_packet,
-       .error                  = dccp_error,
-       .can_early_drop         = dccp_can_early_drop,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-       .print_conntrack        = dccp_print_conntrack,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .nlattr_size            = DCCP_NLATTR_SIZE,
-       .to_nlattr              = dccp_to_nlattr,
-       .from_nlattr            = nlattr_to_dccp,
-       .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
-       .nlattr_tuple_size      = nf_ct_port_nlattr_tuple_size,
-       .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
-       .nla_policy             = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-       .ctnl_timeout           = {
-               .nlattr_to_obj  = dccp_timeout_nlattr_to_obj,
-               .obj_to_nlattr  = dccp_timeout_obj_to_nlattr,
-               .nlattr_max     = CTA_TIMEOUT_DCCP_MAX,
-               .obj_size       = sizeof(unsigned int) * CT_DCCP_MAX,
-               .nla_policy     = dccp_timeout_nla_policy,
-       },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-       .init_net               = dccp_init_net,
-       .get_net_proto          = dccp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
-       .l3proto                = AF_INET6,
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
        .l4proto                = IPPROTO_DCCP,
-       .new                    = dccp_new,
        .packet                 = dccp_packet,
-       .error                  = dccp_error,
        .can_early_drop         = dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack        = dccp_print_conntrack,
@@ -908,4 +870,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
        .init_net               = dccp_init_net,
        .get_net_proto          = dccp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);
index 1df3244ecd07fc573538cccb83da67efaa69a5be..e10e867e0b55f3203e8a50d4ac7c884201ac1186 100644 (file)
@@ -44,12 +44,19 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
 
 /* Returns verdict for packet, or -1 for invalid. */
 static int generic_packet(struct nf_conn *ct,
-                         const struct sk_buff *skb,
+                         struct sk_buff *skb,
                          unsigned int dataoff,
-                         enum ip_conntrack_info ctinfo)
+                         enum ip_conntrack_info ctinfo,
+                         const struct nf_hook_state *state)
 {
        const unsigned int *timeout = nf_ct_timeout_lookup(ct);
 
+       if (!nf_generic_should_process(nf_ct_protonum(ct))) {
+               pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
+                            nf_ct_protonum(ct));
+               return -NF_ACCEPT;
+       }
+
        if (!timeout)
                timeout = &generic_pernet(nf_ct_net(ct))->timeout;
 
@@ -57,19 +64,6 @@ static int generic_packet(struct nf_conn *ct,
        return NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
-                       unsigned int dataoff)
-{
-       bool ret;
-
-       ret = nf_generic_should_process(nf_ct_protonum(ct));
-       if (!ret)
-               pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
-                            nf_ct_protonum(ct));
-       return ret;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
@@ -142,7 +136,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int generic_init_net(struct net *net, u_int16_t proto)
+static int generic_init_net(struct net *net)
 {
        struct nf_generic_net *gn = generic_pernet(net);
        struct nf_proto_net *pn = &gn->pn;
@@ -159,11 +153,9 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net)
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 {
-       .l3proto                = PF_UNSPEC,
        .l4proto                = 255,
        .pkt_to_tuple           = generic_pkt_to_tuple,
        .packet                 = generic_packet,
-       .new                    = generic_new,
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
        .ctnl_timeout           = {
                .nlattr_to_obj  = generic_timeout_nlattr_to_obj,
index 650eb4fba2c5418951b4d22e62325726a745af22..9b48dc8b4b885a00d8806038fc5fd0948e60cbca 100644 (file)
@@ -233,10 +233,26 @@ static unsigned int *gre_get_timeouts(struct net *net)
 
 /* Returns verdict for packet, and may modify conntrack */
 static int gre_packet(struct nf_conn *ct,
-                     const struct sk_buff *skb,
+                     struct sk_buff *skb,
                      unsigned int dataoff,
-                     enum ip_conntrack_info ctinfo)
+                     enum ip_conntrack_info ctinfo,
+                     const struct nf_hook_state *state)
 {
+       if (state->pf != NFPROTO_IPV4)
+               return -NF_ACCEPT;
+
+       if (!nf_ct_is_confirmed(ct)) {
+               unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+
+               if (!timeouts)
+                       timeouts = gre_get_timeouts(nf_ct_net(ct));
+
+               /* initialize to sane value.  Ideally a conntrack helper
+                * (e.g. in case of pptp) is increasing them */
+               ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
+               ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
+       }
+
        /* If we've seen traffic both ways, this is a GRE connection.
         * Extend timeout. */
        if (ct->status & IPS_SEEN_REPLY) {
@@ -252,26 +268,6 @@ static int gre_packet(struct nf_conn *ct,
        return NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
-                   unsigned int dataoff)
-{
-       unsigned int *timeouts = nf_ct_timeout_lookup(ct);
-
-       if (!timeouts)
-               timeouts = gre_get_timeouts(nf_ct_net(ct));
-
-       pr_debug(": ");
-       nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
-       /* initialize to sane value.  Ideally a conntrack helper
-        * (e.g. in case of pptp) is increasing them */
-       ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
-       ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
-
-       return true;
-}
-
 /* Called when a conntrack entry has already been removed from the hashes
  * and is about to be deleted from memory */
 static void gre_destroy(struct nf_conn *ct)
@@ -336,7 +332,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-static int gre_init_net(struct net *net, u_int16_t proto)
+static int gre_init_net(struct net *net)
 {
        struct netns_proto_gre *net_gre = gre_pernet(net);
        int i;
@@ -351,14 +347,12 @@ static int gre_init_net(struct net *net, u_int16_t proto)
 
 /* protocol helper struct */
 static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
-       .l3proto         = AF_INET,
        .l4proto         = IPPROTO_GRE,
        .pkt_to_tuple    = gre_pkt_to_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack = gre_print_conntrack,
 #endif
        .packet          = gre_packet,
-       .new             = gre_new,
        .destroy         = gre_destroy,
        .me              = THIS_MODULE,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
index 43c7e1a217b98682a1abd5d3cdb6f63fa1febec2..3598520bd19b7b76dbd91bb42e4b8b91713abf2c 100644 (file)
@@ -72,34 +72,17 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
        return true;
 }
 
-static unsigned int *icmp_get_timeouts(struct net *net)
-{
-       return &icmp_pernet(net)->timeout;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int icmp_packet(struct nf_conn *ct,
-                      const struct sk_buff *skb,
+                      struct sk_buff *skb,
                       unsigned int dataoff,
-                      enum ip_conntrack_info ctinfo)
+                      enum ip_conntrack_info ctinfo,
+                      const struct nf_hook_state *state)
 {
        /* Do not immediately delete the connection after the first
           successful reply to avoid excessive conntrackd traffic
           and also to handle correctly ICMP echo reply duplicates. */
        unsigned int *timeout = nf_ct_timeout_lookup(ct);
-
-       if (!timeout)
-               timeout = icmp_get_timeouts(nf_ct_net(ct));
-
-       nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-
-       return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                    unsigned int dataoff)
-{
        static const u_int8_t valid_new[] = {
                [ICMP_ECHO] = 1,
                [ICMP_TIMESTAMP] = 1,
@@ -107,21 +90,29 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
                [ICMP_ADDRESS] = 1
        };
 
+       if (state->pf != NFPROTO_IPV4)
+               return -NF_ACCEPT;
+
        if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
            !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
                /* Can't create a new ICMP `conn' with this. */
                pr_debug("icmp: can't create new conn with type %u\n",
                         ct->tuplehash[0].tuple.dst.u.icmp.type);
                nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
-               return false;
+               return -NF_ACCEPT;
        }
-       return true;
+
+       if (!timeout)
+               timeout = &icmp_pernet(nf_ct_net(ct))->timeout;
+
+       nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+       return NF_ACCEPT;
 }
 
 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
 static int
-icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
-                unsigned int hooknum)
+icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
+                  const struct nf_hook_state *state)
 {
        struct nf_conntrack_tuple innertuple, origtuple;
        const struct nf_conntrack_l4proto *innerproto;
@@ -137,13 +128,13 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
        if (!nf_ct_get_tuplepr(skb,
                               skb_network_offset(skb) + ip_hdrlen(skb)
                                                       + sizeof(struct icmphdr),
-                              PF_INET, net, &origtuple)) {
+                              PF_INET, state->net, &origtuple)) {
                pr_debug("icmp_error_message: failed to get tuple\n");
                return -NF_ACCEPT;
        }
 
        /* rcu_read_lock()ed by nf_hook_thresh */
-       innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+       innerproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
 
        /* Ordinarily, we'd expect the inverted tupleproto, but it's
           been preserved inside the ICMP. */
@@ -154,7 +145,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 
        ctinfo = IP_CT_RELATED;
 
-       h = nf_conntrack_find_get(net, zone, &innertuple);
+       h = nf_conntrack_find_get(state->net, zone, &innertuple);
        if (!h) {
                pr_debug("icmp_error_message: no match\n");
                return -NF_ACCEPT;
@@ -168,17 +159,18 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
        return NF_ACCEPT;
 }
 
-static void icmp_error_log(const struct sk_buff *skb, struct net *net,
-                          u8 pf, const char *msg)
+static void icmp_error_log(const struct sk_buff *skb,
+                          const struct nf_hook_state *state,
+                          const char *msg)
 {
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                              IPPROTO_ICMP, "%s", msg);
 }
 
 /* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct net *net, struct nf_conn *tmpl,
-          struct sk_buff *skb, unsigned int dataoff,
-          u8 pf, unsigned int hooknum)
+int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
+                             struct sk_buff *skb, unsigned int dataoff,
+                             const struct nf_hook_state *state)
 {
        const struct icmphdr *icmph;
        struct icmphdr _ih;
@@ -186,14 +178,15 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
        /* Not enough header? */
        icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
        if (icmph == NULL) {
-               icmp_error_log(skb, net, pf, "short packet");
+               icmp_error_log(skb, state, "short packet");
                return -NF_ACCEPT;
        }
 
        /* See ip_conntrack_proto_tcp.c */
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           nf_ip_checksum(skb, hooknum, dataoff, 0)) {
-               icmp_error_log(skb, net, pf, "bad hw icmp checksum");
+       if (state->net->ct.sysctl_checksum &&
+           state->hook == NF_INET_PRE_ROUTING &&
+           nf_ip_checksum(skb, state->hook, dataoff, 0)) {
+               icmp_error_log(skb, state, "bad hw icmp checksum");
                return -NF_ACCEPT;
        }
 
@@ -204,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
         *                discarded.
         */
        if (icmph->type > NR_ICMP_TYPES) {
-               icmp_error_log(skb, net, pf, "invalid icmp type");
+               icmp_error_log(skb, state, "invalid icmp type");
                return -NF_ACCEPT;
        }
 
@@ -216,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
            icmph->type != ICMP_REDIRECT)
                return NF_ACCEPT;
 
-       return icmp_error_message(net, tmpl, skb, hooknum);
+       return icmp_error_message(tmpl, skb, state);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -342,7 +335,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int icmp_init_net(struct net *net, u_int16_t proto)
+static int icmp_init_net(struct net *net)
 {
        struct nf_icmp_net *in = icmp_pernet(net);
        struct nf_proto_net *pn = &in->pn;
@@ -359,13 +352,10 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 {
-       .l3proto                = PF_INET,
        .l4proto                = IPPROTO_ICMP,
        .pkt_to_tuple           = icmp_pkt_to_tuple,
        .invert_tuple           = icmp_invert_tuple,
        .packet                 = icmp_packet,
-       .new                    = icmp_new,
-       .error                  = icmp_error,
        .destroy                = NULL,
        .me                     = NULL,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
index 97e40f77d678a64204cfdd660b92bb1960752305..378618feed5da7df50e09c8ec4f72618953306b0 100644 (file)
@@ -92,11 +92,31 @@ static unsigned int *icmpv6_get_timeouts(struct net *net)
 
 /* Returns verdict for packet, or -1 for invalid. */
 static int icmpv6_packet(struct nf_conn *ct,
-                      const struct sk_buff *skb,
-                      unsigned int dataoff,
-                      enum ip_conntrack_info ctinfo)
+                        struct sk_buff *skb,
+                        unsigned int dataoff,
+                        enum ip_conntrack_info ctinfo,
+                        const struct nf_hook_state *state)
 {
        unsigned int *timeout = nf_ct_timeout_lookup(ct);
+       static const u8 valid_new[] = {
+               [ICMPV6_ECHO_REQUEST - 128] = 1,
+               [ICMPV6_NI_QUERY - 128] = 1
+       };
+
+       if (state->pf != NFPROTO_IPV6)
+               return -NF_ACCEPT;
+
+       if (!nf_ct_is_confirmed(ct)) {
+               int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+               if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+                       /* Can't create a new ICMPv6 `conn' with this. */
+                       pr_debug("icmpv6: can't create new conn with type %u\n",
+                                type + 128);
+                       nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+                       return -NF_ACCEPT;
+               }
+       }
 
        if (!timeout)
                timeout = icmpv6_get_timeouts(nf_ct_net(ct));
@@ -109,26 +129,6 @@ static int icmpv6_packet(struct nf_conn *ct,
        return NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
-                      unsigned int dataoff)
-{
-       static const u_int8_t valid_new[] = {
-               [ICMPV6_ECHO_REQUEST - 128] = 1,
-               [ICMPV6_NI_QUERY - 128] = 1
-       };
-       int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
-
-       if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
-               /* Can't create a new ICMPv6 `conn' with this. */
-               pr_debug("icmpv6: can't create new conn with type %u\n",
-                        type + 128);
-               nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
-               return false;
-       }
-       return true;
-}
-
 static int
 icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
                     struct sk_buff *skb,
@@ -153,7 +153,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
        }
 
        /* rcu_read_lock()ed by nf_hook_thresh */
-       inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
+       inproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
 
        /* Ordinarily, we'd expect the inverted tupleproto, but it's
           been preserved inside the ICMP. */
@@ -179,16 +179,18 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
        return NF_ACCEPT;
 }
 
-static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
-                            u8 pf, const char *msg)
+static void icmpv6_error_log(const struct sk_buff *skb,
+                            const struct nf_hook_state *state,
+                            const char *msg)
 {
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                              IPPROTO_ICMPV6, "%s", msg);
 }
 
-static int
-icmpv6_error(struct net *net, struct nf_conn *tmpl,
-            struct sk_buff *skb, unsigned int dataoff,
-            u8 pf, unsigned int hooknum)
+int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
+                             struct sk_buff *skb,
+                             unsigned int dataoff,
+                             const struct nf_hook_state *state)
 {
        const struct icmp6hdr *icmp6h;
        struct icmp6hdr _ih;
@@ -196,13 +198,14 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 
        icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
        if (icmp6h == NULL) {
-               icmpv6_error_log(skb, net, pf, "short packet");
+               icmpv6_error_log(skb, state, "short packet");
                return -NF_ACCEPT;
        }
 
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
-               icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
+       if (state->hook == NF_INET_PRE_ROUTING &&
+           state->net->ct.sysctl_checksum &&
+           nf_ip6_checksum(skb, state->hook, dataoff, IPPROTO_ICMPV6)) {
+               icmpv6_error_log(skb, state, "ICMPv6 checksum failed");
                return -NF_ACCEPT;
        }
 
@@ -217,7 +220,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
        if (icmp6h->icmp6_type >= 128)
                return NF_ACCEPT;
 
-       return icmpv6_error_message(net, tmpl, skb, dataoff);
+       return icmpv6_error_message(state->net, tmpl, skb, dataoff);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -343,7 +346,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int icmpv6_init_net(struct net *net, u_int16_t proto)
+static int icmpv6_init_net(struct net *net)
 {
        struct nf_icmp_net *in = icmpv6_pernet(net);
        struct nf_proto_net *pn = &in->pn;
@@ -360,13 +363,10 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 {
-       .l3proto                = PF_INET6,
        .l4proto                = IPPROTO_ICMPV6,
        .pkt_to_tuple           = icmpv6_pkt_to_tuple,
        .invert_tuple           = icmpv6_invert_tuple,
        .packet                 = icmpv6_packet,
-       .new                    = icmpv6_new,
-       .error                  = icmpv6_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .tuple_to_nlattr        = icmpv6_tuple_to_nlattr,
        .nlattr_tuple_size      = icmpv6_nlattr_tuple_size,
index e4d738d34cd030fe5b2cd1629d436e3fa0fa4557..3d719d3eb9a38c7709b8d224facdad8820ebded4 100644 (file)
@@ -273,11 +273,100 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
        return sctp_conntracks[dir][i][cur_state];
 }
 
+/* Don't need lock here: this conntrack not in circulation yet */
+static noinline bool
+sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
+        const struct sctphdr *sh, unsigned int dataoff)
+{
+       enum sctp_conntrack new_state;
+       const struct sctp_chunkhdr *sch;
+       struct sctp_chunkhdr _sch;
+       u32 offset, count;
+
+       memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
+       new_state = SCTP_CONNTRACK_MAX;
+       for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) {
+               new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
+                                          SCTP_CONNTRACK_NONE, sch->type);
+
+               /* Invalid: delete conntrack */
+               if (new_state == SCTP_CONNTRACK_NONE ||
+                   new_state == SCTP_CONNTRACK_MAX) {
+                       pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
+                       return false;
+               }
+
+               /* Copy the vtag into the state info */
+               if (sch->type == SCTP_CID_INIT) {
+                       struct sctp_inithdr _inithdr, *ih;
+                       /* Sec 8.5.1 (A) */
+                       if (sh->vtag)
+                               return false;
+
+                       ih = skb_header_pointer(skb, offset + sizeof(_sch),
+                                               sizeof(_inithdr), &_inithdr);
+                       if (!ih)
+                               return false;
+
+                       pr_debug("Setting vtag %x for new conn\n",
+                                ih->init_tag);
+
+                       ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
+               } else if (sch->type == SCTP_CID_HEARTBEAT) {
+                       pr_debug("Setting vtag %x for secondary conntrack\n",
+                                sh->vtag);
+                       ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
+               } else {
+               /* If it is a shutdown ack OOTB packet, we expect a return
+                  shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+                       pr_debug("Setting vtag %x for new conn OOTB\n",
+                                sh->vtag);
+                       ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+               }
+
+               ct->proto.sctp.state = new_state;
+       }
+
+       return true;
+}
+
+static bool sctp_error(struct sk_buff *skb,
+                      unsigned int dataoff,
+                      const struct nf_hook_state *state)
+{
+       const struct sctphdr *sh;
+       const char *logmsg;
+
+       if (skb->len < dataoff + sizeof(struct sctphdr)) {
+               logmsg = "nf_ct_sctp: short packet ";
+               goto out_invalid;
+       }
+       if (state->hook == NF_INET_PRE_ROUTING &&
+           state->net->ct.sysctl_checksum &&
+           skb->ip_summed == CHECKSUM_NONE) {
+               if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
+                       logmsg = "nf_ct_sctp: failed to read header ";
+                       goto out_invalid;
+               }
+               sh = (const struct sctphdr *)(skb->data + dataoff);
+               if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
+                       logmsg = "nf_ct_sctp: bad CRC ";
+                       goto out_invalid;
+               }
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+       }
+       return false;
+out_invalid:
+       nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg);
+       return true;
+}
+
 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */
 static int sctp_packet(struct nf_conn *ct,
-                      const struct sk_buff *skb,
+                      struct sk_buff *skb,
                       unsigned int dataoff,
-                      enum ip_conntrack_info ctinfo)
+                      enum ip_conntrack_info ctinfo,
+                      const struct nf_hook_state *state)
 {
        enum sctp_conntrack new_state, old_state;
        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -289,6 +378,9 @@ static int sctp_packet(struct nf_conn *ct,
        unsigned int *timeouts;
        unsigned long map[256 / sizeof(unsigned long)] = { 0 };
 
+       if (sctp_error(skb, dataoff, state))
+               return -NF_ACCEPT;
+
        sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
        if (sh == NULL)
                goto out;
@@ -296,6 +388,17 @@ static int sctp_packet(struct nf_conn *ct,
        if (do_basic_checks(ct, skb, dataoff, map) != 0)
                goto out;
 
+       if (!nf_ct_is_confirmed(ct)) {
+               /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+               if (test_bit(SCTP_CID_ABORT, map) ||
+                   test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
+                   test_bit(SCTP_CID_COOKIE_ACK, map))
+                       return -NF_ACCEPT;
+
+               if (!sctp_new(ct, skb, sh, dataoff))
+                       return -NF_ACCEPT;
+       }
+
        /* Check the verification tag (Sec 8.5) */
        if (!test_bit(SCTP_CID_INIT, map) &&
            !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
@@ -397,110 +500,6 @@ out:
        return -NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                    unsigned int dataoff)
-{
-       enum sctp_conntrack new_state;
-       const struct sctphdr *sh;
-       struct sctphdr _sctph;
-       const struct sctp_chunkhdr *sch;
-       struct sctp_chunkhdr _sch;
-       u_int32_t offset, count;
-       unsigned long map[256 / sizeof(unsigned long)] = { 0 };
-
-       sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
-       if (sh == NULL)
-               return false;
-
-       if (do_basic_checks(ct, skb, dataoff, map) != 0)
-               return false;
-
-       /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
-       if (test_bit(SCTP_CID_ABORT, map) ||
-           test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
-           test_bit(SCTP_CID_COOKIE_ACK, map))
-               return false;
-
-       memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
-       new_state = SCTP_CONNTRACK_MAX;
-       for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
-               /* Don't need lock here: this conntrack not in circulation yet */
-               new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
-                                          SCTP_CONNTRACK_NONE, sch->type);
-
-               /* Invalid: delete conntrack */
-               if (new_state == SCTP_CONNTRACK_NONE ||
-                   new_state == SCTP_CONNTRACK_MAX) {
-                       pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
-                       return false;
-               }
-
-               /* Copy the vtag into the state info */
-               if (sch->type == SCTP_CID_INIT) {
-                       struct sctp_inithdr _inithdr, *ih;
-                       /* Sec 8.5.1 (A) */
-                       if (sh->vtag)
-                               return false;
-
-                       ih = skb_header_pointer(skb, offset + sizeof(_sch),
-                                               sizeof(_inithdr), &_inithdr);
-                       if (!ih)
-                               return false;
-
-                       pr_debug("Setting vtag %x for new conn\n",
-                                ih->init_tag);
-
-                       ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
-               } else if (sch->type == SCTP_CID_HEARTBEAT) {
-                       pr_debug("Setting vtag %x for secondary conntrack\n",
-                                sh->vtag);
-                       ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
-               }
-               /* If it is a shutdown ack OOTB packet, we expect a return
-                  shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
-               else {
-                       pr_debug("Setting vtag %x for new conn OOTB\n",
-                                sh->vtag);
-                       ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
-               }
-
-               ct->proto.sctp.state = new_state;
-       }
-
-       return true;
-}
-
-static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
-                     unsigned int dataoff,
-                     u8 pf, unsigned int hooknum)
-{
-       const struct sctphdr *sh;
-       const char *logmsg;
-
-       if (skb->len < dataoff + sizeof(struct sctphdr)) {
-               logmsg = "nf_ct_sctp: short packet ";
-               goto out_invalid;
-       }
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           skb->ip_summed == CHECKSUM_NONE) {
-               if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
-                       logmsg = "nf_ct_sctp: failed to read header ";
-                       goto out_invalid;
-               }
-               sh = (const struct sctphdr *)(skb->data + dataoff);
-               if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
-                       logmsg = "nf_ct_sctp: bad CRC ";
-                       goto out_invalid;
-               }
-               skb->ip_summed = CHECKSUM_UNNECESSARY;
-       }
-       return NF_ACCEPT;
-out_invalid:
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
-       return -NF_ACCEPT;
-}
-
 static bool sctp_can_early_drop(const struct nf_conn *ct)
 {
        switch (ct->proto.sctp.state) {
@@ -735,7 +734,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int sctp_init_net(struct net *net, u_int16_t proto)
+static int sctp_init_net(struct net *net)
 {
        struct nf_sctp_net *sn = sctp_pernet(net);
        struct nf_proto_net *pn = &sn->pn;
@@ -760,49 +759,12 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
        return &net->ct.nf_ct_proto.sctp.pn;
 }
 
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
-       .l3proto                = PF_INET,
-       .l4proto                = IPPROTO_SCTP,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-       .print_conntrack        = sctp_print_conntrack,
-#endif
-       .packet                 = sctp_packet,
-       .new                    = sctp_new,
-       .error                  = sctp_error,
-       .can_early_drop         = sctp_can_early_drop,
-       .me                     = THIS_MODULE,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .nlattr_size            = SCTP_NLATTR_SIZE,
-       .to_nlattr              = sctp_to_nlattr,
-       .from_nlattr            = nlattr_to_sctp,
-       .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
-       .nlattr_tuple_size      = nf_ct_port_nlattr_tuple_size,
-       .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
-       .nla_policy             = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-       .ctnl_timeout           = {
-               .nlattr_to_obj  = sctp_timeout_nlattr_to_obj,
-               .obj_to_nlattr  = sctp_timeout_obj_to_nlattr,
-               .nlattr_max     = CTA_TIMEOUT_SCTP_MAX,
-               .obj_size       = sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
-               .nla_policy     = sctp_timeout_nla_policy,
-       },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-       .init_net               = sctp_init_net,
-       .get_net_proto          = sctp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
-       .l3proto                = PF_INET6,
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
        .l4proto                = IPPROTO_SCTP,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack        = sctp_print_conntrack,
 #endif
        .packet                 = sctp_packet,
-       .new                    = sctp_new,
-       .error                  = sctp_error,
        .can_early_drop         = sctp_can_early_drop,
        .me                     = THIS_MODULE,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -826,4 +788,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
        .init_net               = sctp_init_net,
        .get_net_proto          = sctp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6);
index 247b89784a6fb41141bb20cc4d4e5987b33e17a7..1bcf9984d45e8601646cb2b99dc5f3113a5c8b0a 100644 (file)
@@ -717,35 +717,26 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
        [TCPHDR_ACK|TCPHDR_URG]                 = 1,
 };
 
-static void tcp_error_log(const struct sk_buff *skb, struct net *net,
-                         u8 pf, const char *msg)
+static void tcp_error_log(const struct sk_buff *skb,
+                         const struct nf_hook_state *state,
+                         const char *msg)
 {
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
+       nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
 }
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct net *net, struct nf_conn *tmpl,
-                    struct sk_buff *skb,
-                    unsigned int dataoff,
-                    u_int8_t pf,
-                    unsigned int hooknum)
+static bool tcp_error(const struct tcphdr *th,
+                     struct sk_buff *skb,
+                     unsigned int dataoff,
+                     const struct nf_hook_state *state)
 {
-       const struct tcphdr *th;
-       struct tcphdr _tcph;
        unsigned int tcplen = skb->len - dataoff;
-       u_int8_t tcpflags;
-
-       /* Smaller that minimal TCP header? */
-       th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
-       if (th == NULL) {
-               tcp_error_log(skb, net, pf, "short packet");
-               return -NF_ACCEPT;
-       }
+       u8 tcpflags;
 
        /* Not whole TCP header or malformed packet */
        if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
-               tcp_error_log(skb, net, pf, "truncated packet");
-               return -NF_ACCEPT;
+               tcp_error_log(skb, state, "truncated packet");
+               return true;
        }
 
        /* Checksum invalid? Ignore.
@@ -753,27 +744,101 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
         * because the checksum is assumed to be correct.
         */
        /* FIXME: Source route IP option packets --RR */
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
-               tcp_error_log(skb, net, pf, "bad checksum");
-               return -NF_ACCEPT;
+       if (state->net->ct.sysctl_checksum &&
+           state->hook == NF_INET_PRE_ROUTING &&
+           nf_checksum(skb, state->hook, dataoff, IPPROTO_TCP, state->pf)) {
+               tcp_error_log(skb, state, "bad checksum");
+               return true;
        }
 
        /* Check TCP flags. */
        tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
        if (!tcp_valid_flags[tcpflags]) {
-               tcp_error_log(skb, net, pf, "invalid tcp flag combination");
-               return -NF_ACCEPT;
+               tcp_error_log(skb, state, "invalid tcp flag combination");
+               return true;
        }
 
-       return NF_ACCEPT;
+       return false;
+}
+
+static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
+                            unsigned int dataoff,
+                            const struct tcphdr *th)
+{
+       enum tcp_conntrack new_state;
+       struct net *net = nf_ct_net(ct);
+       const struct nf_tcp_net *tn = tcp_pernet(net);
+       const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
+       const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
+
+       /* Don't need lock here: this conntrack not in circulation yet */
+       new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
+
+       /* Invalid: delete conntrack */
+       if (new_state >= TCP_CONNTRACK_MAX) {
+               pr_debug("nf_ct_tcp: invalid new deleting.\n");
+               return false;
+       }
+
+       if (new_state == TCP_CONNTRACK_SYN_SENT) {
+               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
+               /* SYN packet */
+               ct->proto.tcp.seen[0].td_end =
+                       segment_seq_plus_len(ntohl(th->seq), skb->len,
+                                            dataoff, th);
+               ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+               if (ct->proto.tcp.seen[0].td_maxwin == 0)
+                       ct->proto.tcp.seen[0].td_maxwin = 1;
+               ct->proto.tcp.seen[0].td_maxend =
+                       ct->proto.tcp.seen[0].td_end;
+
+               tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
+       } else if (tn->tcp_loose == 0) {
+               /* Don't try to pick up connections. */
+               return false;
+       } else {
+               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
+               /*
+                * We are in the middle of a connection,
+                * its history is lost for us.
+                * Let's try to use the data from the packet.
+                */
+               ct->proto.tcp.seen[0].td_end =
+                       segment_seq_plus_len(ntohl(th->seq), skb->len,
+                                            dataoff, th);
+               ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+               if (ct->proto.tcp.seen[0].td_maxwin == 0)
+                       ct->proto.tcp.seen[0].td_maxwin = 1;
+               ct->proto.tcp.seen[0].td_maxend =
+                       ct->proto.tcp.seen[0].td_end +
+                       ct->proto.tcp.seen[0].td_maxwin;
+
+               /* We assume SACK and liberal window checking to handle
+                * window scaling */
+               ct->proto.tcp.seen[0].flags =
+               ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
+                                             IP_CT_TCP_FLAG_BE_LIBERAL;
+       }
+
+       /* tcp_packet will set them */
+       ct->proto.tcp.last_index = TCP_NONE_SET;
+
+       pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i "
+                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+                __func__,
+                sender->td_end, sender->td_maxend, sender->td_maxwin,
+                sender->td_scale,
+                receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+                receiver->td_scale);
+       return true;
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
 static int tcp_packet(struct nf_conn *ct,
-                     const struct sk_buff *skb,
+                     struct sk_buff *skb,
                      unsigned int dataoff,
-                     enum ip_conntrack_info ctinfo)
+                     enum ip_conntrack_info ctinfo,
+                     const struct nf_hook_state *state)
 {
        struct net *net = nf_ct_net(ct);
        struct nf_tcp_net *tn = tcp_pernet(net);
@@ -786,7 +851,14 @@ static int tcp_packet(struct nf_conn *ct,
        unsigned long timeout;
 
        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
-       BUG_ON(th == NULL);
+       if (th == NULL)
+               return -NF_ACCEPT;
+
+       if (tcp_error(th, skb, dataoff, state))
+               return -NF_ACCEPT;
+
+       if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
+               return -NF_ACCEPT;
 
        spin_lock_bh(&ct->lock);
        old_state = ct->proto.tcp.state;
@@ -1067,82 +1139,6 @@ static int tcp_packet(struct nf_conn *ct,
        return NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                   unsigned int dataoff)
-{
-       enum tcp_conntrack new_state;
-       const struct tcphdr *th;
-       struct tcphdr _tcph;
-       struct net *net = nf_ct_net(ct);
-       struct nf_tcp_net *tn = tcp_pernet(net);
-       const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
-       const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
-
-       th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
-       BUG_ON(th == NULL);
-
-       /* Don't need lock here: this conntrack not in circulation yet */
-       new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
-
-       /* Invalid: delete conntrack */
-       if (new_state >= TCP_CONNTRACK_MAX) {
-               pr_debug("nf_ct_tcp: invalid new deleting.\n");
-               return false;
-       }
-
-       if (new_state == TCP_CONNTRACK_SYN_SENT) {
-               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
-               /* SYN packet */
-               ct->proto.tcp.seen[0].td_end =
-                       segment_seq_plus_len(ntohl(th->seq), skb->len,
-                                            dataoff, th);
-               ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-               if (ct->proto.tcp.seen[0].td_maxwin == 0)
-                       ct->proto.tcp.seen[0].td_maxwin = 1;
-               ct->proto.tcp.seen[0].td_maxend =
-                       ct->proto.tcp.seen[0].td_end;
-
-               tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
-       } else if (tn->tcp_loose == 0) {
-               /* Don't try to pick up connections. */
-               return false;
-       } else {
-               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
-               /*
-                * We are in the middle of a connection,
-                * its history is lost for us.
-                * Let's try to use the data from the packet.
-                */
-               ct->proto.tcp.seen[0].td_end =
-                       segment_seq_plus_len(ntohl(th->seq), skb->len,
-                                            dataoff, th);
-               ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-               if (ct->proto.tcp.seen[0].td_maxwin == 0)
-                       ct->proto.tcp.seen[0].td_maxwin = 1;
-               ct->proto.tcp.seen[0].td_maxend =
-                       ct->proto.tcp.seen[0].td_end +
-                       ct->proto.tcp.seen[0].td_maxwin;
-
-               /* We assume SACK and liberal window checking to handle
-                * window scaling */
-               ct->proto.tcp.seen[0].flags =
-               ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
-                                             IP_CT_TCP_FLAG_BE_LIBERAL;
-       }
-
-       /* tcp_packet will set them */
-       ct->proto.tcp.last_index = TCP_NONE_SET;
-
-       pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
-                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-                sender->td_end, sender->td_maxend, sender->td_maxwin,
-                sender->td_scale,
-                receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-                receiver->td_scale);
-       return true;
-}
-
 static bool tcp_can_early_drop(const struct nf_conn *ct)
 {
        switch (ct->proto.tcp.state) {
@@ -1510,7 +1506,7 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int tcp_init_net(struct net *net, u_int16_t proto)
+static int tcp_init_net(struct net *net)
 {
        struct nf_tcp_net *tn = tcp_pernet(net);
        struct nf_proto_net *pn = &tn->pn;
@@ -1538,16 +1534,13 @@ static struct nf_proto_net *tcp_get_net_proto(struct net *net)
        return &net->ct.nf_ct_proto.tcp.pn;
 }
 
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
 {
-       .l3proto                = PF_INET,
        .l4proto                = IPPROTO_TCP,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack        = tcp_print_conntrack,
 #endif
        .packet                 = tcp_packet,
-       .new                    = tcp_new,
-       .error                  = tcp_error,
        .can_early_drop         = tcp_can_early_drop,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .to_nlattr              = tcp_to_nlattr,
@@ -1571,39 +1564,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
        .init_net               = tcp_init_net,
        .get_net_proto          = tcp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
-{
-       .l3proto                = PF_INET6,
-       .l4proto                = IPPROTO_TCP,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-       .print_conntrack        = tcp_print_conntrack,
-#endif
-       .packet                 = tcp_packet,
-       .new                    = tcp_new,
-       .error                  = tcp_error,
-       .can_early_drop         = tcp_can_early_drop,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .nlattr_size            = TCP_NLATTR_SIZE,
-       .to_nlattr              = tcp_to_nlattr,
-       .from_nlattr            = nlattr_to_tcp,
-       .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
-       .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
-       .nlattr_tuple_size      = tcp_nlattr_tuple_size,
-       .nla_policy             = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-       .ctnl_timeout           = {
-               .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
-               .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
-               .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
-               .obj_size       = sizeof(unsigned int) *
-                                       TCP_CONNTRACK_TIMEOUT_MAX,
-               .nla_policy     = tcp_timeout_nla_policy,
-       },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-       .init_net               = tcp_init_net,
-       .get_net_proto          = tcp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
index 3065fb8ef91b74348f95900a7b3b0cfc92970469..a7aa70370913ce7e8914343270152fb009eb2a63 100644 (file)
@@ -42,14 +42,65 @@ static unsigned int *udp_get_timeouts(struct net *net)
        return udp_pernet(net)->timeouts;
 }
 
+static void udp_error_log(const struct sk_buff *skb,
+                         const struct nf_hook_state *state,
+                         const char *msg)
+{
+       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                              IPPROTO_UDP, "%s", msg);
+}
+
+static bool udp_error(struct sk_buff *skb,
+                     unsigned int dataoff,
+                     const struct nf_hook_state *state)
+{
+       unsigned int udplen = skb->len - dataoff;
+       const struct udphdr *hdr;
+       struct udphdr _hdr;
+
+       /* Header is too small? */
+       hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       if (!hdr) {
+               udp_error_log(skb, state, "short packet");
+               return true;
+       }
+
+       /* Truncated/malformed packets */
+       if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
+               udp_error_log(skb, state, "truncated/malformed packet");
+               return true;
+       }
+
+       /* Packet with no checksum */
+       if (!hdr->check)
+               return false;
+
+       /* Checksum invalid? Ignore.
+        * We skip checking packets on the outgoing path
+        * because the checksum is assumed to be correct.
+        * FIXME: Source route IP option packets --RR */
+       if (state->hook == NF_INET_PRE_ROUTING &&
+           state->net->ct.sysctl_checksum &&
+           nf_checksum(skb, state->hook, dataoff, IPPROTO_UDP, state->pf)) {
+               udp_error_log(skb, state, "bad checksum");
+               return true;
+       }
+
+       return false;
+}
+
 /* Returns verdict for packet, and may modify conntracktype */
 static int udp_packet(struct nf_conn *ct,
-                     const struct sk_buff *skb,
+                     struct sk_buff *skb,
                      unsigned int dataoff,
-                     enum ip_conntrack_info ctinfo)
+                     enum ip_conntrack_info ctinfo,
+                     const struct nf_hook_state *state)
 {
        unsigned int *timeouts;
 
+       if (udp_error(skb, dataoff, state))
+               return -NF_ACCEPT;
+
        timeouts = nf_ct_timeout_lookup(ct);
        if (!timeouts)
                timeouts = udp_get_timeouts(nf_ct_net(ct));
@@ -69,24 +120,18 @@ static int udp_packet(struct nf_conn *ct,
        return NF_ACCEPT;
 }
 
-/* Called when a new connection for this protocol found. */
-static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                   unsigned int dataoff)
-{
-       return true;
-}
-
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
-static void udplite_error_log(const struct sk_buff *skb, struct net *net,
-                             u8 pf, const char *msg)
+static void udplite_error_log(const struct sk_buff *skb,
+                             const struct nf_hook_state *state,
+                             const char *msg)
 {
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
+       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                              IPPROTO_UDPLITE, "%s", msg);
 }
 
-static int udplite_error(struct net *net, struct nf_conn *tmpl,
-                        struct sk_buff *skb,
-                        unsigned int dataoff,
-                        u8 pf, unsigned int hooknum)
+static bool udplite_error(struct sk_buff *skb,
+                         unsigned int dataoff,
+                         const struct nf_hook_state *state)
 {
        unsigned int udplen = skb->len - dataoff;
        const struct udphdr *hdr;
@@ -96,80 +141,67 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
        /* Header is too small? */
        hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
        if (!hdr) {
-               udplite_error_log(skb, net, pf, "short packet");
-               return -NF_ACCEPT;
+               udplite_error_log(skb, state, "short packet");
+               return true;
        }
 
        cscov = ntohs(hdr->len);
        if (cscov == 0) {
                cscov = udplen;
        } else if (cscov < sizeof(*hdr) || cscov > udplen) {
-               udplite_error_log(skb, net, pf, "invalid checksum coverage");
-               return -NF_ACCEPT;
+               udplite_error_log(skb, state, "invalid checksum coverage");
+               return true;
        }
 
        /* UDPLITE mandates checksums */
        if (!hdr->check) {
-               udplite_error_log(skb, net, pf, "checksum missing");
-               return -NF_ACCEPT;
+               udplite_error_log(skb, state, "checksum missing");
+               return true;
        }
 
        /* Checksum invalid? Ignore. */
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
-                               pf)) {
-               udplite_error_log(skb, net, pf, "bad checksum");
-               return -NF_ACCEPT;
+       if (state->hook == NF_INET_PRE_ROUTING &&
+           state->net->ct.sysctl_checksum &&
+           nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP,
+                               state->pf)) {
+               udplite_error_log(skb, state, "bad checksum");
+               return true;
        }
 
-       return NF_ACCEPT;
-}
-#endif
-
-static void udp_error_log(const struct sk_buff *skb, struct net *net,
-                         u8 pf, const char *msg)
-{
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
+       return false;
 }
 
-static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
-                    unsigned int dataoff,
-                    u_int8_t pf,
-                    unsigned int hooknum)
+/* Returns verdict for packet, and may modify conntracktype */
+static int udplite_packet(struct nf_conn *ct,
+                         struct sk_buff *skb,
+                         unsigned int dataoff,
+                         enum ip_conntrack_info ctinfo,
+                         const struct nf_hook_state *state)
 {
-       unsigned int udplen = skb->len - dataoff;
-       const struct udphdr *hdr;
-       struct udphdr _hdr;
-
-       /* Header is too small? */
-       hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-       if (hdr == NULL) {
-               udp_error_log(skb, net, pf, "short packet");
-               return -NF_ACCEPT;
-       }
+       unsigned int *timeouts;
 
-       /* Truncated/malformed packets */
-       if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
-               udp_error_log(skb, net, pf, "truncated/malformed packet");
+       if (udplite_error(skb, dataoff, state))
                return -NF_ACCEPT;
-       }
 
-       /* Packet with no checksum */
-       if (!hdr->check)
-               return NF_ACCEPT;
+       timeouts = nf_ct_timeout_lookup(ct);
+       if (!timeouts)
+               timeouts = udp_get_timeouts(nf_ct_net(ct));
 
-       /* Checksum invalid? Ignore.
-        * We skip checking packets on the outgoing path
-        * because the checksum is assumed to be correct.
-        * FIXME: Source route IP option packets --RR */
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
-               udp_error_log(skb, net, pf, "bad checksum");
-               return -NF_ACCEPT;
+       /* If we've seen traffic both ways, this is some kind of UDP
+          stream.  Extend timeout. */
+       if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+               nf_ct_refresh_acct(ct, ctinfo, skb,
+                                  timeouts[UDP_CT_REPLIED]);
+               /* Also, more likely to be important, and not a probe */
+               if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+                       nf_conntrack_event_cache(IPCT_ASSURED, ct);
+       } else {
+               nf_ct_refresh_acct(ct, ctinfo, skb,
+                                  timeouts[UDP_CT_UNREPLIED]);
        }
-
        return NF_ACCEPT;
 }
+#endif
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
@@ -258,7 +290,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int udp_init_net(struct net *net, u_int16_t proto)
+static int udp_init_net(struct net *net)
 {
        struct nf_udp_net *un = udp_pernet(net);
        struct nf_proto_net *pn = &un->pn;
@@ -278,72 +310,11 @@ static struct nf_proto_net *udp_get_net_proto(struct net *net)
        return &net->ct.nf_ct_proto.udp.pn;
 }
 
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
-{
-       .l3proto                = PF_INET,
-       .l4proto                = IPPROTO_UDP,
-       .allow_clash            = true,
-       .packet                 = udp_packet,
-       .new                    = udp_new,
-       .error                  = udp_error,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
-       .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
-       .nlattr_tuple_size      = nf_ct_port_nlattr_tuple_size,
-       .nla_policy             = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-       .ctnl_timeout           = {
-               .nlattr_to_obj  = udp_timeout_nlattr_to_obj,
-               .obj_to_nlattr  = udp_timeout_obj_to_nlattr,
-               .nlattr_max     = CTA_TIMEOUT_UDP_MAX,
-               .obj_size       = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
-               .nla_policy     = udp_timeout_nla_policy,
-       },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-       .init_net               = udp_init_net,
-       .get_net_proto          = udp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
-
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
-{
-       .l3proto                = PF_INET,
-       .l4proto                = IPPROTO_UDPLITE,
-       .allow_clash            = true,
-       .packet                 = udp_packet,
-       .new                    = udp_new,
-       .error                  = udplite_error,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
-       .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
-       .nlattr_tuple_size      = nf_ct_port_nlattr_tuple_size,
-       .nla_policy             = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-       .ctnl_timeout           = {
-               .nlattr_to_obj  = udp_timeout_nlattr_to_obj,
-               .obj_to_nlattr  = udp_timeout_obj_to_nlattr,
-               .nlattr_max     = CTA_TIMEOUT_UDP_MAX,
-               .obj_size       = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
-               .nla_policy     = udp_timeout_nla_policy,
-       },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-       .init_net               = udp_init_net,
-       .get_net_proto          = udp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
-#endif
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
 {
-       .l3proto                = PF_INET6,
        .l4proto                = IPPROTO_UDP,
        .allow_clash            = true,
        .packet                 = udp_packet,
-       .new                    = udp_new,
-       .error                  = udp_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
@@ -362,17 +333,13 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
        .init_net               = udp_init_net,
        .get_net_proto          = udp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
 
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
 {
-       .l3proto                = PF_INET6,
        .l4proto                = IPPROTO_UDPLITE,
        .allow_clash            = true,
-       .packet                 = udp_packet,
-       .new                    = udp_new,
-       .error                  = udplite_error,
+       .packet                 = udplite_packet,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
        .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
@@ -391,5 +358,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
        .init_net               = udp_init_net,
        .get_net_proto          = udp_get_net_proto,
 };
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
 #endif
index 13279f683da9786f3b6fea88b96ba96bc07f62c5..463d17d349c1bca02361fbb625059f93b34e3bc0 100644 (file)
@@ -292,7 +292,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
        if (!net_eq(nf_ct_net(ct), net))
                goto release;
 
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+       l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
        WARN_ON(!l4proto);
 
        ret = -ENOSPC;
@@ -720,10 +720,3 @@ static void __exit nf_conntrack_standalone_fini(void)
 
 module_init(nf_conntrack_standalone_init);
 module_exit(nf_conntrack_standalone_fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
-   They should call this. */
-void need_conntrack(void)
-{
-}
-EXPORT_SYMBOL_GPL(need_conntrack);
index d8125616edc79dd311c12dd1806c97dd4e24f4e4..185c633b6872b1cf9b04b76f09b14ca2b4a3c5ea 100644 (file)
@@ -120,7 +120,7 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
        if (l4num == IPPROTO_TCP)
                flow_offload_fixup_tcp(&ct->proto.tcp);
 
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
+       l4proto = __nf_ct_l4proto_find(l4num);
        if (!l4proto)
                return;
 
@@ -233,8 +233,8 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
        struct flow_offload *flow;
        int dir;
 
-       tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
-                                          nf_flow_offload_rhash_params);
+       tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
+                                     nf_flow_offload_rhash_params);
        if (!tuplehash)
                return NULL;
 
@@ -254,20 +254,17 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
        struct flow_offload_tuple_rhash *tuplehash;
        struct rhashtable_iter hti;
        struct flow_offload *flow;
-       int err;
-
-       err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
-       if (err)
-               return err;
+       int err = 0;
 
+       rhashtable_walk_enter(&flow_table->rhashtable, &hti);
        rhashtable_walk_start(&hti);
 
        while ((tuplehash = rhashtable_walk_next(&hti))) {
                if (IS_ERR(tuplehash)) {
-                       err = PTR_ERR(tuplehash);
-                       if (err != -EAGAIN)
-                               goto out;
-
+                       if (PTR_ERR(tuplehash) != -EAGAIN) {
+                               err = PTR_ERR(tuplehash);
+                               break;
+                       }
                        continue;
                }
                if (tuplehash->tuple.dir)
@@ -277,7 +274,6 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 
                iter(flow, data);
        }
-out:
        rhashtable_walk_stop(&hti);
        rhashtable_walk_exit(&hti);
 
@@ -290,25 +286,19 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
        return (__s32)(flow->timeout - (u32)jiffies) <= 0;
 }
 
-static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
+static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 {
        struct flow_offload_tuple_rhash *tuplehash;
        struct rhashtable_iter hti;
        struct flow_offload *flow;
-       int err;
-
-       err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
-       if (err)
-               return 0;
 
+       rhashtable_walk_enter(&flow_table->rhashtable, &hti);
        rhashtable_walk_start(&hti);
 
        while ((tuplehash = rhashtable_walk_next(&hti))) {
                if (IS_ERR(tuplehash)) {
-                       err = PTR_ERR(tuplehash);
-                       if (err != -EAGAIN)
-                               goto out;
-
+                       if (PTR_ERR(tuplehash) != -EAGAIN)
+                               break;
                        continue;
                }
                if (tuplehash->tuple.dir)
@@ -321,11 +311,8 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
                                    FLOW_OFFLOAD_TEARDOWN)))
                        flow_offload_del(flow_table, flow);
        }
-out:
        rhashtable_walk_stop(&hti);
        rhashtable_walk_exit(&hti);
-
-       return 1;
 }
 
 static void nf_flow_offload_work_gc(struct work_struct *work)
@@ -514,7 +501,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
        mutex_unlock(&flowtable_lock);
        cancel_delayed_work_sync(&flow_table->gc_work);
        nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
-       WARN_ON(!nf_flow_offload_gc_step(flow_table));
+       nf_flow_offload_gc_step(flow_table);
        rhashtable_destroy(&flow_table->rhashtable);
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_free);
index 15ed91309992e85121f0eb4c3ad01d5be2bdd194..1d291a51cd45b74e5f70b34c304e519f2c41875b 100644 (file)
@@ -254,8 +254,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
        if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
                return NF_ACCEPT;
 
-       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-           nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+       if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
                return NF_DROP;
 
        flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
@@ -471,8 +470,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
        if (skb_try_make_writable(skb, sizeof(*ip6h)))
                return NF_DROP;
 
-       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-           nf_flow_nat_ipv6(flow, skb, dir) < 0)
+       if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
                return NF_DROP;
 
        flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
index 99606baedda4903dc4fa360ac63d28fad1109b7e..38793b95d9bca6b6aa884bda1d2590b2824e1060 100644 (file)
@@ -37,7 +37,7 @@ static void mangle_contents(struct sk_buff *skb,
 {
        unsigned char *data;
 
-       BUG_ON(skb_is_nonlinear(skb));
+       SKB_LINEAR_ASSERT(skb);
        data = skb_network_header(skb) + dataoff;
 
        /* move post-replacement */
@@ -110,8 +110,6 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
            !enlarge_skb(skb, rep_len - match_len))
                return false;
 
-       SKB_LINEAR_ASSERT(skb);
-
        tcph = (void *)skb->data + protoff;
 
        oldlen = skb->len - protoff;
index adee04af8d43f519402c20b4f1a8bd11929a2159..78a9e6454ff3d712926397beb904b478b8fab0f1 100644 (file)
@@ -52,13 +52,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
 
                newdst = 0;
 
-               rcu_read_lock();
                indev = __in_dev_get_rcu(skb->dev);
                if (indev && indev->ifa_list) {
                        ifa = indev->ifa_list;
                        newdst = ifa->ifa_local;
                }
-               rcu_read_unlock();
 
                if (!newdst)
                        return NF_DROP;
@@ -97,7 +95,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
                struct inet6_ifaddr *ifa;
                bool addr = false;
 
-               rcu_read_lock();
                idev = __in6_dev_get(skb->dev);
                if (idev != NULL) {
                        read_lock_bh(&idev->lock);
@@ -108,7 +105,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
                        }
                        read_unlock_bh(&idev->lock);
                }
-               rcu_read_unlock();
 
                if (!addr)
                        return NF_DROP;
index 2cfb173cd0b2d8a5e99e5165edcecffcd610af33..f0159eea29780ed93419bce343ec05094691c2a6 100644 (file)
@@ -27,6 +27,8 @@
 static LIST_HEAD(nf_tables_expressions);
 static LIST_HEAD(nf_tables_objects);
 static LIST_HEAD(nf_tables_flowtables);
+static LIST_HEAD(nf_tables_destroy_list);
+static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
 static u64 table_handle;
 
 enum {
@@ -64,6 +66,8 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state)
 
        net->nft.validate_state = new_validate_state;
 }
+static void nf_tables_trans_destroy_work(struct work_struct *w);
+static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
 
 static void nft_ctx_init(struct nft_ctx *ctx,
                         struct net *net,
@@ -207,6 +211,18 @@ static int nft_delchain(struct nft_ctx *ctx)
        return err;
 }
 
+/* either expr ops provide both activate/deactivate, or neither */
+static bool nft_expr_check_ops(const struct nft_expr_ops *ops)
+{
+       if (!ops)
+               return true;
+
+       if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate)))
+               return false;
+
+       return true;
+}
+
 static void nft_rule_expr_activate(const struct nft_ctx *ctx,
                                   struct nft_rule *rule)
 {
@@ -298,7 +314,7 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
        return 0;
 }
 
-static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
                             struct nft_set *set)
 {
        struct nft_trans *trans;
@@ -318,7 +334,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
        return 0;
 }
 
-static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
+static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
 {
        int err;
 
@@ -1005,7 +1021,8 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 
 static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
-       BUG_ON(ctx->table->use > 0);
+       if (WARN_ON(ctx->table->use > 0))
+               return;
 
        rhltable_destroy(&ctx->table->chains_ht);
        kfree(ctx->table->name);
@@ -1412,7 +1429,8 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
 {
        struct nft_chain *chain = ctx->chain;
 
-       BUG_ON(chain->use > 0);
+       if (WARN_ON(chain->use > 0))
+               return;
 
        /* no concurrent access possible anymore */
        nf_tables_chain_free_chain_rules(chain);
@@ -1907,6 +1925,9 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
  */
 int nft_register_expr(struct nft_expr_type *type)
 {
+       if (!nft_expr_check_ops(type->ops))
+               return -EINVAL;
+
        nfnl_lock(NFNL_SUBSYS_NFTABLES);
        if (type->family == NFPROTO_UNSPEC)
                list_add_tail_rcu(&type->list, &nf_tables_expressions);
@@ -2054,6 +2075,10 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
                        err = PTR_ERR(ops);
                        goto err1;
                }
+               if (!nft_expr_check_ops(ops)) {
+                       err = -EINVAL;
+                       goto err1;
+               }
        } else
                ops = type->ops;
 
@@ -2434,7 +2459,6 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 {
        struct nft_expr *expr;
 
-       lockdep_assert_held(&ctx->net->nft.commit_mutex);
        /*
         * Careful: some expressions might not be initialized in case this
         * is called on error from nf_tables_newrule().
@@ -3567,13 +3591,6 @@ static void nft_set_destroy(struct nft_set *set)
        kvfree(set);
 }
 
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
-{
-       list_del_rcu(&set->list);
-       nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
-       nft_set_destroy(set);
-}
-
 static int nf_tables_delset(struct net *net, struct sock *nlsk,
                            struct sk_buff *skb, const struct nlmsghdr *nlh,
                            const struct nlattr * const nla[],
@@ -3668,17 +3685,38 @@ bind:
 }
 EXPORT_SYMBOL_GPL(nf_tables_bind_set);
 
-void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
                          struct nft_set_binding *binding)
+{
+       if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
+           nft_is_active(ctx->net, set))
+               list_add_tail_rcu(&set->list, &ctx->table->sets);
+
+       list_add_tail_rcu(&binding->list, &set->bindings);
+}
+EXPORT_SYMBOL_GPL(nf_tables_rebind_set);
+
+void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+                         struct nft_set_binding *binding)
 {
        list_del_rcu(&binding->list);
 
        if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
            nft_is_active(ctx->net, set))
-               nf_tables_set_destroy(ctx, set);
+               list_del_rcu(&set->list);
 }
 EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
 
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
+{
+       if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
+           nft_is_active(ctx->net, set)) {
+               nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+               nft_set_destroy(set);
+       }
+}
+EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
+
 const struct nft_set_ext_type nft_set_ext_types[] = {
        [NFT_SET_EXT_KEY]               = {
                .align  = __alignof__(u32),
@@ -6191,19 +6229,28 @@ static void nft_commit_release(struct nft_trans *trans)
                nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
                break;
        }
+
+       if (trans->put_net)
+               put_net(trans->ctx.net);
+
        kfree(trans);
 }
 
-static void nf_tables_commit_release(struct net *net)
+static void nf_tables_trans_destroy_work(struct work_struct *w)
 {
        struct nft_trans *trans, *next;
+       LIST_HEAD(head);
 
-       if (list_empty(&net->nft.commit_list))
+       spin_lock(&nf_tables_destroy_list_lock);
+       list_splice_init(&nf_tables_destroy_list, &head);
+       spin_unlock(&nf_tables_destroy_list_lock);
+
+       if (list_empty(&head))
                return;
 
        synchronize_rcu();
 
-       list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+       list_for_each_entry_safe(trans, next, &head, list) {
                list_del(&trans->list);
                nft_commit_release(trans);
        }
@@ -6334,6 +6381,37 @@ static void nft_chain_del(struct nft_chain *chain)
        list_del_rcu(&chain->list);
 }
 
+static void nf_tables_commit_release(struct net *net)
+{
+       struct nft_trans *trans;
+
+       /* all side effects have to be made visible.
+        * For example, if a chain named 'foo' has been deleted, a
+        * new transaction must not find it anymore.
+        *
+        * Memory reclaim happens asynchronously from work queue
+        * to prevent expensive synchronize_rcu() in commit phase.
+        */
+       if (list_empty(&net->nft.commit_list)) {
+               mutex_unlock(&net->nft.commit_mutex);
+               return;
+       }
+
+       trans = list_last_entry(&net->nft.commit_list,
+                               struct nft_trans, list);
+       get_net(trans->ctx.net);
+       WARN_ON_ONCE(trans->put_net);
+
+       trans->put_net = true;
+       spin_lock(&nf_tables_destroy_list_lock);
+       list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list);
+       spin_unlock(&nf_tables_destroy_list_lock);
+
+       mutex_unlock(&net->nft.commit_mutex);
+
+       schedule_work(&trans_destroy_work);
+}
+
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
        struct nft_trans *trans, *next;
@@ -6495,9 +6573,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                }
        }
 
-       nf_tables_commit_release(net);
        nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
-       mutex_unlock(&net->nft.commit_mutex);
+       nf_tables_commit_release(net);
 
        return 0;
 }
@@ -7168,7 +7245,8 @@ int __nft_release_basechain(struct nft_ctx *ctx)
 {
        struct nft_rule *rule, *nr;
 
-       BUG_ON(!nft_is_base_chain(ctx->chain));
+       if (WARN_ON(!nft_is_base_chain(ctx->chain)))
+               return 0;
 
        nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
        list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
@@ -7271,6 +7349,7 @@ static int __init nf_tables_module_init(void)
 {
        int err;
 
+       spin_lock_init(&nf_tables_destroy_list_lock);
        err = register_pernet_subsys(&nf_tables_net_ops);
        if (err < 0)
                return err;
@@ -7310,6 +7389,7 @@ static void __exit nf_tables_module_exit(void)
        unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
        nft_chain_filter_fini();
        unregister_pernet_subsys(&nf_tables_net_ops);
+       cancel_work_sync(&trans_destroy_work);
        rcu_barrier();
        nf_tables_core_module_exit();
 }
index ffd5c0f9412b4dab673b3294292a2fa6a7da404b..3fbce3b9c5ec0f51c9841aca36c0e5fc48b233c6 100644 (file)
@@ -249,12 +249,24 @@ static struct nft_expr_type *nft_basic_types[] = {
        &nft_exthdr_type,
 };
 
+static struct nft_object_type *nft_basic_objects[] = {
+#ifdef CONFIG_NETWORK_SECMARK
+       &nft_secmark_obj_type,
+#endif
+};
+
 int __init nf_tables_core_module_init(void)
 {
-       int err, i;
+       int err, i, j = 0;
+
+       for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) {
+               err = nft_register_obj(nft_basic_objects[i]);
+               if (err)
+                       goto err;
+       }
 
-       for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) {
-               err = nft_register_expr(nft_basic_types[i]);
+       for (j = 0; j < ARRAY_SIZE(nft_basic_types); j++) {
+               err = nft_register_expr(nft_basic_types[j]);
                if (err)
                        goto err;
        }
@@ -262,8 +274,12 @@ int __init nf_tables_core_module_init(void)
        return 0;
 
 err:
+       while (j-- > 0)
+               nft_unregister_expr(nft_basic_types[j]);
+
        while (i-- > 0)
-               nft_unregister_expr(nft_basic_types[i]);
+               nft_unregister_obj(nft_basic_objects[i]);
+
        return err;
 }
 
@@ -274,4 +290,8 @@ void nf_tables_core_module_exit(void)
        i = ARRAY_SIZE(nft_basic_types);
        while (i-- > 0)
                nft_unregister_expr(nft_basic_types[i]);
+
+       i = ARRAY_SIZE(nft_basic_objects);
+       while (i-- > 0)
+               nft_unregister_obj(nft_basic_objects[i]);
 }
index a30f8ba4b89ac427053281936ad3e70750e3a2f0..b48545b84ce80de61e7c5b2d356804ed75d928ae 100644 (file)
@@ -53,9 +53,6 @@ ctnl_timeout_parse_policy(void *timeout,
        struct nlattr **tb;
        int ret = 0;
 
-       if (!l4proto->ctnl_timeout.nlattr_to_obj)
-               return 0;
-
        tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
                     GFP_KERNEL);
 
@@ -125,7 +122,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
                return -EBUSY;
        }
 
-       l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+       l4proto = nf_ct_l4proto_find_get(l4num);
 
        /* This protocol is not supportted, skip. */
        if (l4proto->l4proto != l4num) {
@@ -167,6 +164,8 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
        struct nfgenmsg *nfmsg;
        unsigned int flags = portid ? NLM_F_MULTI : 0;
        const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
+       struct nlattr *nest_parms;
+       int ret;
 
        event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -186,22 +185,15 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
                         htonl(refcount_read(&timeout->refcnt))))
                goto nla_put_failure;
 
-       if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
-               struct nlattr *nest_parms;
-               int ret;
-
-               nest_parms = nla_nest_start(skb,
-                                           CTA_TIMEOUT_DATA | NLA_F_NESTED);
-               if (!nest_parms)
-                       goto nla_put_failure;
+       nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED);
+       if (!nest_parms)
+               goto nla_put_failure;
 
-               ret = l4proto->ctnl_timeout.obj_to_nlattr(skb,
-                                                       &timeout->timeout.data);
-               if (ret < 0)
-                       goto nla_put_failure;
+       ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout.data);
+       if (ret < 0)
+               goto nla_put_failure;
 
-               nla_nest_end(skb, nest_parms);
-       }
+       nla_nest_end(skb, nest_parms);
 
        nlmsg_end(skb, nlh);
        return skb->len;
@@ -369,7 +361,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 
        l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
        l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
-       l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+       l4proto = nf_ct_l4proto_find_get(l4num);
 
        /* This protocol is not supported, skip. */
        if (l4proto->l4proto != l4num) {
@@ -391,12 +383,14 @@ err:
 
 static int
 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
-                           u32 seq, u32 type, int event,
+                           u32 seq, u32 type, int event, u16 l3num,
                            const struct nf_conntrack_l4proto *l4proto)
 {
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
        unsigned int flags = portid ? NLM_F_MULTI : 0;
+       struct nlattr *nest_parms;
+       int ret;
 
        event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -408,25 +402,19 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
        nfmsg->version = NFNETLINK_V0;
        nfmsg->res_id = 0;
 
-       if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
+       if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) ||
            nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
                goto nla_put_failure;
 
-       if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
-               struct nlattr *nest_parms;
-               int ret;
-
-               nest_parms = nla_nest_start(skb,
-                                           CTA_TIMEOUT_DATA | NLA_F_NESTED);
-               if (!nest_parms)
-                       goto nla_put_failure;
+       nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED);
+       if (!nest_parms)
+               goto nla_put_failure;
 
-               ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
-               if (ret < 0)
-                       goto nla_put_failure;
+       ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
+       if (ret < 0)
+               goto nla_put_failure;
 
-               nla_nest_end(skb, nest_parms);
-       }
+       nla_nest_end(skb, nest_parms);
 
        nlmsg_end(skb, nlh);
        return skb->len;
@@ -454,7 +442,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 
        l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
        l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
-       l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+       l4proto = nf_ct_l4proto_find_get(l4num);
 
        /* This protocol is not supported, skip. */
        if (l4proto->l4proto != l4num) {
@@ -472,6 +460,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
                                          nlh->nlmsg_seq,
                                          NFNL_MSG_TYPE(nlh->nlmsg_type),
                                          IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
+                                         l3num,
                                          l4proto);
        if (ret <= 0) {
                kfree_skb(skb2);
index fa90a8402845d1768fce3741e3173a8493268558..79d48c1d06f4dc192e8b8fd9ba68b0dbe8d7864b 100644 (file)
@@ -79,7 +79,8 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 
        err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
                            tb[NFTA_CMP_DATA]);
-       BUG_ON(err < 0);
+       if (err < 0)
+               return err;
 
        priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
        err = nft_validate_register_load(priv->sreg, desc.len);
@@ -129,7 +130,8 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
 
        err = nft_data_init(NULL, &data, sizeof(data), &desc,
                            tb[NFTA_CMP_DATA]);
-       BUG_ON(err < 0);
+       if (err < 0)
+               return err;
 
        priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
        err = nft_validate_register_load(priv->sreg, desc.len);
index 5dd87748afa8a9185de95c439881f72d3f96e798..586627c361dfcf8026505d1bff3b5287b2e3e96f 100644 (file)
@@ -279,7 +279,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
 {
        const struct nft_ct *priv = nft_expr_priv(expr);
        struct sk_buff *skb = pkt->skb;
-#ifdef CONFIG_NF_CONNTRACK_MARK
+#if defined(CONFIG_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_SECMARK)
        u32 value = regs->data[priv->sreg];
 #endif
        enum ip_conntrack_info ctinfo;
@@ -298,6 +298,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
                }
                break;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+       case NFT_CT_SECMARK:
+               if (ct->secmark != value) {
+                       ct->secmark = value;
+                       nf_conntrack_event_cache(IPCT_SECMARK, ct);
+               }
+               break;
+#endif
 #ifdef CONFIG_NF_CONNTRACK_LABELS
        case NFT_CT_LABELS:
                nf_connlabels_replace(ct,
@@ -564,6 +572,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
                        return -EINVAL;
                len = sizeof(u32);
                break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+       case NFT_CT_SECMARK:
+               if (tb[NFTA_CT_DIRECTION])
+                       return -EINVAL;
+               len = sizeof(u32);
+               break;
 #endif
        default:
                return -EOPNOTSUPP;
@@ -776,9 +791,6 @@ nft_ct_timeout_parse_policy(void *timeouts,
        struct nlattr **tb;
        int ret = 0;
 
-       if (!l4proto->ctnl_timeout.nlattr_to_obj)
-               return 0;
-
        tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
                     GFP_KERNEL);
 
@@ -858,7 +870,7 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
        l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
        priv->l4proto = l4num;
 
-       l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+       l4proto = nf_ct_l4proto_find_get(l4num);
 
        if (l4proto->l4proto != l4num) {
                ret = -EOPNOTSUPP;
index 6e91a37d57f2736a128cefe886422a15d1cee851..07d4efd3d85182997edb4ae0a1fd74d88221a07f 100644 (file)
@@ -235,14 +235,31 @@ err1:
        return err;
 }
 
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+                               const struct nft_expr *expr)
+{
+       struct nft_dynset *priv = nft_expr_priv(expr);
+
+       nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_dynset_deactivate(const struct nft_ctx *ctx,
+                                 const struct nft_expr *expr)
+{
+       struct nft_dynset *priv = nft_expr_priv(expr);
+
+       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
 static void nft_dynset_destroy(const struct nft_ctx *ctx,
                               const struct nft_expr *expr)
 {
        struct nft_dynset *priv = nft_expr_priv(expr);
 
-       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
        if (priv->expr != NULL)
                nft_expr_destroy(ctx, priv->expr);
+
+       nf_tables_destroy_set(ctx, priv->set);
 }
 
 static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -279,6 +296,8 @@ static const struct nft_expr_ops nft_dynset_ops = {
        .eval           = nft_dynset_eval,
        .init           = nft_dynset_init,
        .destroy        = nft_dynset_destroy,
+       .activate       = nft_dynset_activate,
+       .deactivate     = nft_dynset_deactivate,
        .dump           = nft_dynset_dump,
 };
 
index ad13e8643599722a2eb0376957e126f5b192385d..227b2b15a19cbd979df780b3660e2395b689c5db 100644 (file)
@@ -121,12 +121,28 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
        return 0;
 }
 
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+                               const struct nft_expr *expr)
+{
+       struct nft_lookup *priv = nft_expr_priv(expr);
+
+       nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_lookup_deactivate(const struct nft_ctx *ctx,
+                                 const struct nft_expr *expr)
+{
+       struct nft_lookup *priv = nft_expr_priv(expr);
+
+       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
 static void nft_lookup_destroy(const struct nft_ctx *ctx,
                               const struct nft_expr *expr)
 {
        struct nft_lookup *priv = nft_expr_priv(expr);
 
-       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+       nf_tables_destroy_set(ctx, priv->set);
 }
 
 static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -209,6 +225,8 @@ static const struct nft_expr_ops nft_lookup_ops = {
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
        .eval           = nft_lookup_eval,
        .init           = nft_lookup_init,
+       .activate       = nft_lookup_activate,
+       .deactivate     = nft_lookup_deactivate,
        .destroy        = nft_lookup_destroy,
        .dump           = nft_lookup_dump,
        .validate       = nft_lookup_validate,
index 297fe7d97c182ffbcbfb15c94f10bdec633f1149..6180626c3f80b9069e7af752f568a282e157cc3e 100644 (file)
@@ -284,6 +284,11 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
 
                skb->nf_trace = !!value8;
                break;
+#ifdef CONFIG_NETWORK_SECMARK
+       case NFT_META_SECMARK:
+               skb->secmark = value;
+               break;
+#endif
        default:
                WARN_ON(1);
        }
@@ -436,6 +441,9 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
        switch (priv->key) {
        case NFT_META_MARK:
        case NFT_META_PRIORITY:
+#ifdef CONFIG_NETWORK_SECMARK
+       case NFT_META_SECMARK:
+#endif
                len = sizeof(u32);
                break;
        case NFT_META_NFTRACE:
@@ -543,3 +551,111 @@ struct nft_expr_type nft_meta_type __read_mostly = {
        .maxattr        = NFTA_META_MAX,
        .owner          = THIS_MODULE,
 };
+
+#ifdef CONFIG_NETWORK_SECMARK
+struct nft_secmark {
+       u32 secid;
+       char *ctx;
+};
+
+static const struct nla_policy nft_secmark_policy[NFTA_SECMARK_MAX + 1] = {
+       [NFTA_SECMARK_CTX]     = { .type = NLA_STRING, .len = NFT_SECMARK_CTX_MAXLEN },
+};
+
+static int nft_secmark_compute_secid(struct nft_secmark *priv)
+{
+       u32 tmp_secid = 0;
+       int err;
+
+       err = security_secctx_to_secid(priv->ctx, strlen(priv->ctx), &tmp_secid);
+       if (err)
+               return err;
+
+       if (!tmp_secid)
+               return -ENOENT;
+
+       err = security_secmark_relabel_packet(tmp_secid);
+       if (err)
+               return err;
+
+       priv->secid = tmp_secid;
+       return 0;
+}
+
+static void nft_secmark_obj_eval(struct nft_object *obj, struct nft_regs *regs,
+                                const struct nft_pktinfo *pkt)
+{
+       const struct nft_secmark *priv = nft_obj_data(obj);
+       struct sk_buff *skb = pkt->skb;
+
+       skb->secmark = priv->secid;
+}
+
+static int nft_secmark_obj_init(const struct nft_ctx *ctx,
+                               const struct nlattr * const tb[],
+                               struct nft_object *obj)
+{
+       struct nft_secmark *priv = nft_obj_data(obj);
+       int err;
+
+       if (tb[NFTA_SECMARK_CTX] == NULL)
+               return -EINVAL;
+
+       priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL);
+       if (!priv->ctx)
+               return -ENOMEM;
+
+       err = nft_secmark_compute_secid(priv);
+       if (err) {
+               kfree(priv->ctx);
+               return err;
+       }
+
+       security_secmark_refcount_inc();
+
+       return 0;
+}
+
+static int nft_secmark_obj_dump(struct sk_buff *skb, struct nft_object *obj,
+                               bool reset)
+{
+       struct nft_secmark *priv = nft_obj_data(obj);
+       int err;
+
+       if (nla_put_string(skb, NFTA_SECMARK_CTX, priv->ctx))
+               return -1;
+
+       if (reset) {
+               err = nft_secmark_compute_secid(priv);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static void nft_secmark_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
+{
+       struct nft_secmark *priv = nft_obj_data(obj);
+
+       security_secmark_refcount_dec();
+
+       kfree(priv->ctx);
+}
+
+static const struct nft_object_ops nft_secmark_obj_ops = {
+       .type           = &nft_secmark_obj_type,
+       .size           = sizeof(struct nft_secmark),
+       .init           = nft_secmark_obj_init,
+       .eval           = nft_secmark_obj_eval,
+       .dump           = nft_secmark_obj_dump,
+       .destroy        = nft_secmark_obj_destroy,
+};
+struct nft_object_type nft_secmark_obj_type __read_mostly = {
+       .type           = NFT_OBJECT_SECMARK,
+       .ops            = &nft_secmark_obj_ops,
+       .maxattr        = NFTA_SECMARK_MAX,
+       .policy         = nft_secmark_policy,
+       .owner          = THIS_MODULE,
+};
+#endif /* CONFIG_NETWORK_SECMARK */
index cdf348f751eca0c22018d99954d98aac66499d8d..a3185ca2a3a985712f5b2262df3f9e28af6fab4e 100644 (file)
@@ -155,12 +155,28 @@ nla_put_failure:
        return -1;
 }
 
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+                                   const struct nft_expr *expr)
+{
+       struct nft_objref_map *priv = nft_expr_priv(expr);
+
+       nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
+                                     const struct nft_expr *expr)
+{
+       struct nft_objref_map *priv = nft_expr_priv(expr);
+
+       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
 static void nft_objref_map_destroy(const struct nft_ctx *ctx,
                                   const struct nft_expr *expr)
 {
        struct nft_objref_map *priv = nft_expr_priv(expr);
 
-       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+       nf_tables_destroy_set(ctx, priv->set);
 }
 
 static struct nft_expr_type nft_objref_type;
@@ -169,6 +185,8 @@ static const struct nft_expr_ops nft_objref_map_ops = {
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
        .eval           = nft_objref_map_eval,
        .init           = nft_objref_map_init,
+       .activate       = nft_objref_map_activate,
+       .deactivate     = nft_objref_map_deactivate,
        .destroy        = nft_objref_map_destroy,
        .dump           = nft_objref_map_dump,
 };
index 29f5bd2377b0deaf7ede8ec0573bf71cfeef7478..b48e58cceeb72f9635263ac6985da98af48cbbf7 100644 (file)
@@ -94,7 +94,8 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
 
 int nft_reject_icmp_code(u8 code)
 {
-       BUG_ON(code > NFT_REJECT_ICMPX_MAX);
+       if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
+               return ICMP_NET_UNREACH;
 
        return icmp_code_v4[code];
 }
@@ -111,7 +112,8 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
 
 int nft_reject_icmpv6_code(u8 code)
 {
-       BUG_ON(code > NFT_REJECT_ICMPX_MAX);
+       if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
+               return ICMPV6_NOROUTE;
 
        return icmp_code_v6[code];
 }
index 76dba9f6b6f627de7de1ada08320cc2ed5a12b24..f35fa33913ae4d085c0d63fc7602f8f873d516f8 100644 (file)
@@ -90,6 +90,11 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
        case NFT_RT_TCPMSS:
                nft_reg_store16(dest, get_tcpmss(pkt, dst));
                break;
+#ifdef CONFIG_XFRM
+       case NFT_RT_XFRM:
+               nft_reg_store8(dest, !!dst->xfrm);
+               break;
+#endif
        default:
                WARN_ON(1);
                goto err;
@@ -130,6 +135,11 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
        case NFT_RT_TCPMSS:
                len = sizeof(u16);
                break;
+#ifdef CONFIG_XFRM
+       case NFT_RT_XFRM:
+               len = sizeof(u8);
+               break;
+#endif
        default:
                return -EOPNOTSUPP;
        }
@@ -164,6 +174,7 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
        case NFT_RT_NEXTHOP4:
        case NFT_RT_NEXTHOP6:
        case NFT_RT_CLASSID:
+       case NFT_RT_XFRM:
                return 0;
        case NFT_RT_TCPMSS:
                hooks = (1 << NF_INET_FORWARD) |
index 015124e649cbdf0fb658515cf7c7e2dbf550a972..339a9dd1c83210ec800f043c59eb68e4d6659d95 100644 (file)
@@ -88,7 +88,7 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
                .key     = key,
        };
 
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+       he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
        if (he != NULL)
                *ext = &he->ext;
 
@@ -106,7 +106,7 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
                .key     = elem->key.val.data,
        };
 
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+       he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
        if (he != NULL)
                return he;
 
@@ -129,7 +129,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
                .key     = key,
        };
 
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+       he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
        if (he != NULL)
                goto out;
 
@@ -217,7 +217,7 @@ static void *nft_rhash_deactivate(const struct net *net,
        };
 
        rcu_read_lock();
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+       he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
        if (he != NULL &&
            !nft_rhash_flush(net, set, he))
                he = NULL;
@@ -244,21 +244,15 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
        struct nft_rhash_elem *he;
        struct rhashtable_iter hti;
        struct nft_set_elem elem;
-       int err;
-
-       err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
-       iter->err = err;
-       if (err)
-               return;
 
+       rhashtable_walk_enter(&priv->ht, &hti);
        rhashtable_walk_start(&hti);
 
        while ((he = rhashtable_walk_next(&hti))) {
                if (IS_ERR(he)) {
-                       err = PTR_ERR(he);
-                       if (err != -EAGAIN) {
-                               iter->err = err;
-                               goto out;
+                       if (PTR_ERR(he) != -EAGAIN) {
+                               iter->err = PTR_ERR(he);
+                               break;
                        }
 
                        continue;
@@ -275,13 +269,11 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
                iter->err = iter->fn(ctx, set, iter, &elem);
                if (iter->err < 0)
-                       goto out;
+                       break;
 
 cont:
                iter->count++;
        }
-
-out:
        rhashtable_walk_stop(&hti);
        rhashtable_walk_exit(&hti);
 }
@@ -293,21 +285,17 @@ static void nft_rhash_gc(struct work_struct *work)
        struct nft_rhash *priv;
        struct nft_set_gc_batch *gcb = NULL;
        struct rhashtable_iter hti;
-       int err;
 
        priv = container_of(work, struct nft_rhash, gc_work.work);
        set  = nft_set_container_of(priv);
 
-       err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
-       if (err)
-               goto schedule;
-
+       rhashtable_walk_enter(&priv->ht, &hti);
        rhashtable_walk_start(&hti);
 
        while ((he = rhashtable_walk_next(&hti))) {
                if (IS_ERR(he)) {
                        if (PTR_ERR(he) != -EAGAIN)
-                               goto out;
+                               break;
                        continue;
                }
 
@@ -326,17 +314,15 @@ gc:
 
                gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
                if (gcb == NULL)
-                       goto out;
+                       break;
                rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
                atomic_dec(&set->nelems);
                nft_set_gc_batch_add(gcb, he);
        }
-out:
        rhashtable_walk_stop(&hti);
        rhashtable_walk_exit(&hti);
 
        nft_set_gc_batch_complete(gcb);
-schedule:
        queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
                           nft_set_gc_interval(set));
 }
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
new file mode 100644 (file)
index 0000000..3cf71a2
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/in.h>
+#include <net/xfrm.h>
+
+static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = {
+       [NFTA_XFRM_KEY]         = { .type = NLA_U32 },
+       [NFTA_XFRM_DIR]         = { .type = NLA_U8 },
+       [NFTA_XFRM_SPNUM]       = { .type = NLA_U32 },
+       [NFTA_XFRM_DREG]        = { .type = NLA_U32 },
+};
+
+struct nft_xfrm {
+       enum nft_xfrm_keys      key:8;
+       enum nft_registers      dreg:8;
+       u8                      dir;
+       u8                      spnum;
+};
+
+static int nft_xfrm_get_init(const struct nft_ctx *ctx,
+                            const struct nft_expr *expr,
+                            const struct nlattr * const tb[])
+{
+       struct nft_xfrm *priv = nft_expr_priv(expr);
+       unsigned int len = 0;
+       u32 spnum = 0;
+       u8 dir;
+
+       if (!tb[NFTA_XFRM_KEY] || !tb[NFTA_XFRM_DIR] || !tb[NFTA_XFRM_DREG])
+               return -EINVAL;
+
+       switch (ctx->family) {
+       case NFPROTO_IPV4:
+       case NFPROTO_IPV6:
+       case NFPROTO_INET:
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       priv->key = ntohl(nla_get_u32(tb[NFTA_XFRM_KEY]));
+       switch (priv->key) {
+       case NFT_XFRM_KEY_REQID:
+       case NFT_XFRM_KEY_SPI:
+               len = sizeof(u32);
+               break;
+       case NFT_XFRM_KEY_DADDR_IP4:
+       case NFT_XFRM_KEY_SADDR_IP4:
+               len = sizeof(struct in_addr);
+               break;
+       case NFT_XFRM_KEY_DADDR_IP6:
+       case NFT_XFRM_KEY_SADDR_IP6:
+               len = sizeof(struct in6_addr);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       dir = nla_get_u8(tb[NFTA_XFRM_DIR]);
+       switch (dir) {
+       case XFRM_POLICY_IN:
+       case XFRM_POLICY_OUT:
+               priv->dir = dir;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (tb[NFTA_XFRM_SPNUM])
+               spnum = ntohl(nla_get_be32(tb[NFTA_XFRM_SPNUM]));
+
+       if (spnum >= XFRM_MAX_DEPTH)
+               return -ERANGE;
+
+       priv->spnum = spnum;
+
+       priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]);
+       return nft_validate_register_store(ctx, priv->dreg, NULL,
+                                          NFT_DATA_VALUE, len);
+}
+
+/* Return true if key asks for daddr/saddr and current
+ * state does have a valid address (BEET, TUNNEL).
+ */
+static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
+{
+       switch (k) {
+       case NFT_XFRM_KEY_DADDR_IP4:
+       case NFT_XFRM_KEY_SADDR_IP4:
+               if (family == NFPROTO_IPV4)
+                       break;
+               return false;
+       case NFT_XFRM_KEY_DADDR_IP6:
+       case NFT_XFRM_KEY_SADDR_IP6:
+               if (family == NFPROTO_IPV6)
+                       break;
+               return false;
+       default:
+               return true;
+       }
+
+       return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
+}
+
+static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
+                                  struct nft_regs *regs,
+                                  const struct xfrm_state *state,
+                                  u8 family)
+{
+       u32 *dest = &regs->data[priv->dreg];
+
+       if (!xfrm_state_addr_ok(priv->key, family, state->props.mode)) {
+               regs->verdict.code = NFT_BREAK;
+               return;
+       }
+
+       switch (priv->key) {
+       case NFT_XFRM_KEY_UNSPEC:
+       case __NFT_XFRM_KEY_MAX:
+               WARN_ON_ONCE(1);
+               break;
+       case NFT_XFRM_KEY_DADDR_IP4:
+               *dest = state->id.daddr.a4;
+               return;
+       case NFT_XFRM_KEY_DADDR_IP6:
+               memcpy(dest, &state->id.daddr.in6, sizeof(struct in6_addr));
+               return;
+       case NFT_XFRM_KEY_SADDR_IP4:
+               *dest = state->props.saddr.a4;
+               return;
+       case NFT_XFRM_KEY_SADDR_IP6:
+               memcpy(dest, &state->props.saddr.in6, sizeof(struct in6_addr));
+               return;
+       case NFT_XFRM_KEY_REQID:
+               *dest = state->props.reqid;
+               return;
+       case NFT_XFRM_KEY_SPI:
+               *dest = state->id.spi;
+               return;
+       }
+
+       regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
+                                   struct nft_regs *regs,
+                                   const struct nft_pktinfo *pkt)
+{
+       const struct sec_path *sp = pkt->skb->sp;
+       const struct xfrm_state *state;
+
+       if (sp == NULL || sp->len <= priv->spnum) {
+               regs->verdict.code = NFT_BREAK;
+               return;
+       }
+
+       state = sp->xvec[priv->spnum];
+       nft_xfrm_state_get_key(priv, regs, state, nft_pf(pkt));
+}
+
+static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
+                                 struct nft_regs *regs,
+                                 const struct nft_pktinfo *pkt)
+{
+       const struct dst_entry *dst = skb_dst(pkt->skb);
+       int i;
+
+       for (i = 0; dst && dst->xfrm;
+            dst = ((const struct xfrm_dst *)dst)->child, i++) {
+               if (i < priv->spnum)
+                       continue;
+
+               nft_xfrm_state_get_key(priv, regs, dst->xfrm, nft_pf(pkt));
+               return;
+       }
+
+       regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_xfrm_get_eval(const struct nft_expr *expr,
+                             struct nft_regs *regs,
+                             const struct nft_pktinfo *pkt)
+{
+       const struct nft_xfrm *priv = nft_expr_priv(expr);
+
+       switch (priv->dir) {
+       case XFRM_POLICY_IN:
+               nft_xfrm_get_eval_in(priv, regs, pkt);
+               break;
+       case XFRM_POLICY_OUT:
+               nft_xfrm_get_eval_out(priv, regs, pkt);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               regs->verdict.code = NFT_BREAK;
+               break;
+       }
+}
+
+static int nft_xfrm_get_dump(struct sk_buff *skb,
+                            const struct nft_expr *expr)
+{
+       const struct nft_xfrm *priv = nft_expr_priv(expr);
+
+       if (nft_dump_register(skb, NFTA_XFRM_DREG, priv->dreg))
+               return -1;
+
+       if (nla_put_be32(skb, NFTA_XFRM_KEY, htonl(priv->key)))
+               return -1;
+       if (nla_put_u8(skb, NFTA_XFRM_DIR, priv->dir))
+               return -1;
+       if (nla_put_be32(skb, NFTA_XFRM_SPNUM, htonl(priv->spnum)))
+               return -1;
+
+       return 0;
+}
+
+static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                            const struct nft_data **data)
+{
+       const struct nft_xfrm *priv = nft_expr_priv(expr);
+       unsigned int hooks;
+
+       switch (priv->dir) {
+       case XFRM_POLICY_IN:
+               hooks = (1 << NF_INET_FORWARD) |
+                       (1 << NF_INET_LOCAL_IN) |
+                       (1 << NF_INET_PRE_ROUTING);
+               break;
+       case XFRM_POLICY_OUT:
+               hooks = (1 << NF_INET_FORWARD) |
+                       (1 << NF_INET_LOCAL_OUT) |
+                       (1 << NF_INET_POST_ROUTING);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return -EINVAL;
+       }
+
+       return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+
+static struct nft_expr_type nft_xfrm_type;
+static const struct nft_expr_ops nft_xfrm_get_ops = {
+       .type           = &nft_xfrm_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_xfrm)),
+       .eval           = nft_xfrm_get_eval,
+       .init           = nft_xfrm_get_init,
+       .dump           = nft_xfrm_get_dump,
+       .validate       = nft_xfrm_validate,
+};
+
+static struct nft_expr_type nft_xfrm_type __read_mostly = {
+       .name           = "xfrm",
+       .ops            = &nft_xfrm_get_ops,
+       .policy         = nft_xfrm_policy,
+       .maxattr        = NFTA_XFRM_MAX,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_xfrm_module_init(void)
+{
+       return nft_register_expr(&nft_xfrm_type);
+}
+
+static void __exit nft_xfrm_module_exit(void)
+{
+       nft_unregister_expr(&nft_xfrm_type);
+}
+
+module_init(nft_xfrm_module_init);
+module_exit(nft_xfrm_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("nf_tables: xfrm/IPSec matching");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_AUTHOR("Máté Eckl <ecklm94@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("xfrm");
index 89457efd2e008261b549dff8fe73d0ec8abaad02..2c7a4b80206f50cfca179f3c5a731bf70091938b 100644 (file)
@@ -159,7 +159,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
        /* Make sure the timeout policy matches any existing protocol tracker,
         * otherwise default to generic.
         */
-       l4proto = __nf_ct_l4proto_find(par->family, proto);
+       l4proto = __nf_ct_l4proto_find(proto);
        if (timeout->l4proto->l4proto != l4proto->l4proto) {
                ret = -EINVAL;
                pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
index 5ee85919378348367a3b32b710b4e3b6e5590574..c6acfc2d9c8414d36173e3cf09f94ea64f0d7515 100644 (file)
@@ -68,8 +68,6 @@ struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
 {
        struct idletimer_tg *entry;
 
-       BUG_ON(!label);
-
        list_for_each_entry(entry, &idletimer_tg_list, entry) {
                if (!strcmp(label, entry->attr.attr.name))
                        return entry;
@@ -172,8 +170,6 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb,
        pr_debug("resetting timer %s, timeout period %u\n",
                 info->label, info->timeout);
 
-       BUG_ON(!info->timer);
-
        mod_timer(&info->timer->timer,
                  msecs_to_jiffies(info->timeout * 1000) + jiffies);
 
index 4ad5fe27e08bcc6732f8f8a9977530b908430578..f16202d26c205a37eb5d456f697152903aa23343 100644 (file)
@@ -35,8 +35,6 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
        u32 secmark = 0;
        const struct xt_secmark_target_info *info = par->targinfo;
 
-       BUG_ON(info->mode != mode);
-
        switch (mode) {
        case SECMARK_MODE_SEL:
                secmark = info->secid;
index 5d92e178198088b85d040473f909aa9eab78c18e..5cb1ecb29ea4d5c4f5df9fc325aac8e81d0bde58 100644 (file)
@@ -68,6 +68,38 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
        return 0;
 }
 
+static int cgroup_mt_check_v2(const struct xt_mtchk_param *par)
+{
+       struct xt_cgroup_info_v2 *info = par->matchinfo;
+       struct cgroup *cgrp;
+
+       if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+               return -EINVAL;
+
+       if (!info->has_path && !info->has_classid) {
+               pr_info("xt_cgroup: no path or classid specified\n");
+               return -EINVAL;
+       }
+
+       if (info->has_path && info->has_classid) {
+               pr_info_ratelimited("path and classid specified\n");
+               return -EINVAL;
+       }
+
+       info->priv = NULL;
+       if (info->has_path) {
+               cgrp = cgroup_get_from_path(info->path);
+               if (IS_ERR(cgrp)) {
+                       pr_info_ratelimited("invalid path, errno=%ld\n",
+                                           PTR_ERR(cgrp));
+                       return -EINVAL;
+               }
+               info->priv = cgrp;
+       }
+
+       return 0;
+}
+
 static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
@@ -99,6 +131,24 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
                        info->invert_classid;
 }
 
+static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+       const struct xt_cgroup_info_v2 *info = par->matchinfo;
+       struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+       struct cgroup *ancestor = info->priv;
+       struct sock *sk = skb->sk;
+
+       if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
+               return false;
+
+       if (ancestor)
+               return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+                       info->invert_path;
+       else
+               return (info->classid == sock_cgroup_classid(skcd)) ^
+                       info->invert_classid;
+}
+
 static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
 {
        struct xt_cgroup_info_v1 *info = par->matchinfo;
@@ -107,6 +157,14 @@ static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
                cgroup_put(info->priv);
 }
 
+static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+       struct xt_cgroup_info_v2 *info = par->matchinfo;
+
+       if (info->priv)
+               cgroup_put(info->priv);
+}
+
 static struct xt_match cgroup_mt_reg[] __read_mostly = {
        {
                .name           = "cgroup",
@@ -134,6 +192,20 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = {
                                  (1 << NF_INET_POST_ROUTING) |
                                  (1 << NF_INET_LOCAL_IN),
        },
+       {
+               .name           = "cgroup",
+               .revision       = 2,
+               .family         = NFPROTO_UNSPEC,
+               .checkentry     = cgroup_mt_check_v2,
+               .match          = cgroup_mt_v2,
+               .matchsize      = sizeof(struct xt_cgroup_info_v2),
+               .usersize       = offsetof(struct xt_cgroup_info_v2, priv),
+               .destroy        = cgroup_mt_destroy_v2,
+               .me             = THIS_MODULE,
+               .hooks          = (1 << NF_INET_LOCAL_OUT) |
+                                 (1 << NF_INET_POST_ROUTING) |
+                                 (1 << NF_INET_LOCAL_IN),
+       },
 };
 
 static int __init cgroup_mt_init(void)
index 10d61a6eed712442c14cc1011341c2dff5890c2d..fceae245eb0367f2b950a3117851c7c7a162eb7e 100644 (file)
 #include <linux/netfilter/xt_quota.h>
 #include <linux/module.h>
 
-struct xt_quota_priv {
-       spinlock_t      lock;
-       uint64_t        quota;
-};
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
 MODULE_DESCRIPTION("Xtables: countdown quota match");
@@ -26,54 +21,48 @@ static bool
 quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
        struct xt_quota_info *q = (void *)par->matchinfo;
-       struct xt_quota_priv *priv = q->master;
+       u64 current_count = atomic64_read(&q->counter);
        bool ret = q->flags & XT_QUOTA_INVERT;
-
-       spin_lock_bh(&priv->lock);
-       if (priv->quota >= skb->len) {
-               priv->quota -= skb->len;
-               ret = !ret;
-       } else {
-               /* we do not allow even small packets from now on */
-               priv->quota = 0;
-       }
-       spin_unlock_bh(&priv->lock);
-
-       return ret;
+       u64 old_count, new_count;
+
+       do {
+               if (current_count == 1)
+                       return ret;
+               if (current_count <= skb->len) {
+                       atomic64_set(&q->counter, 1);
+                       return ret;
+               }
+               old_count = current_count;
+               new_count = current_count - skb->len;
+               current_count = atomic64_cmpxchg(&q->counter, old_count,
+                                                new_count);
+       } while (current_count != old_count);
+       return !ret;
 }
 
 static int quota_mt_check(const struct xt_mtchk_param *par)
 {
        struct xt_quota_info *q = par->matchinfo;
 
+       BUILD_BUG_ON(sizeof(atomic64_t) != sizeof(__u64));
+
        if (q->flags & ~XT_QUOTA_MASK)
                return -EINVAL;
+       if (atomic64_read(&q->counter) > q->quota + 1)
+               return -ERANGE;
 
-       q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
-       if (q->master == NULL)
-               return -ENOMEM;
-
-       spin_lock_init(&q->master->lock);
-       q->master->quota = q->quota;
+       if (atomic64_read(&q->counter) == 0)
+               atomic64_set(&q->counter, q->quota + 1);
        return 0;
 }
 
-static void quota_mt_destroy(const struct xt_mtdtor_param *par)
-{
-       const struct xt_quota_info *q = par->matchinfo;
-
-       kfree(q->master);
-}
-
 static struct xt_match quota_mt_reg __read_mostly = {
        .name       = "quota",
        .revision   = 0,
        .family     = NFPROTO_UNSPEC,
        .match      = quota_mt,
        .checkentry = quota_mt_check,
-       .destroy    = quota_mt_destroy,
        .matchsize  = sizeof(struct xt_quota_info),
-       .usersize   = offsetof(struct xt_quota_info, master),
        .me         = THIS_MODULE,
 };
 
index 35ae64cbef33fa733c3ffb98bffcdbae78c2273a..6bec37ab4472796ecd1f453966b27bb911bf8fa8 100644 (file)
@@ -933,6 +933,11 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
        struct nf_conn *ct;
 
        if (!cached) {
+               struct nf_hook_state state = {
+                       .hook = NF_INET_PRE_ROUTING,
+                       .pf = info->family,
+                       .net = net,
+               };
                struct nf_conn *tmpl = info->ct;
                int err;
 
@@ -944,8 +949,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
                        nf_ct_set(skb, tmpl, IP_CT_NEW);
                }
 
-               err = nf_conntrack_in(net, info->family,
-                                     NF_INET_PRE_ROUTING, skb);
+               err = nf_conntrack_in(skb, &state);
                if (err != NF_ACCEPT)
                        return -ENOENT;