net: move the nat function to nf_nat_ovs for ovs and tc
authorXin Long <lucien.xin@gmail.com>
Thu, 8 Dec 2022 16:56:12 +0000 (11:56 -0500)
committerDavid S. Miller <davem@davemloft.net>
Mon, 12 Dec 2022 10:14:03 +0000 (10:14 +0000)
There are two nat functions are nearly the same in both OVS and
TC code, (ovs_)ct_nat_execute() and ovs_ct_nat/tcf_ct_act_nat().

This patch creates nf_nat_ovs.c under netfilter and moves them
there then exports nf_ct_nat() so that it can be shared by both
OVS and TC, and keeps the nat (type) check and nat flag update
in OVS and TC's own place, as these parts are different between
OVS and TC.

Note that in OVS nat function it was using skb->protocol to get
the proto as it already skips vlans in key_extract(), while it
doesn't in TC, and TC has to call skb_protocol() to get proto.
So in nf_ct_nat_execute(), we keep using skb_protocol() which
works for both OVS and TC contrack.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Aaron Conole <aconole@redhat.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/netfilter/nf_nat.h
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_nat_ovs.c [new file with mode: 0644]
net/openvswitch/Kconfig
net/openvswitch/conntrack.c
net/sched/Kconfig
net/sched/act_ct.c

index e9eb01e99d2f94384712db1138ec92e00b95307a..9877f064548a2350143430f2919da57a77acb2ff 100644 (file)
@@ -104,6 +104,10 @@ unsigned int
 nf_nat_inet_fn(void *priv, struct sk_buff *skb,
               const struct nf_hook_state *state);
 
+int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct,
+             enum ip_conntrack_info ctinfo, int *action,
+             const struct nf_nat_range2 *range, bool commit);
+
 static inline int nf_nat_initialized(const struct nf_conn *ct,
                                     enum nf_nat_manip_type manip)
 {
index 0846bd75b1dab90a5fb2f6c04dcc5ca116f61c5d..f71b41c7ce2ff6c04bd82b8d0303bc7993046fb7 100644 (file)
@@ -459,6 +459,9 @@ config NF_NAT_REDIRECT
 config NF_NAT_MASQUERADE
        bool
 
+config NF_NAT_OVS
+       bool
+
 config NETFILTER_SYNPROXY
        tristate
 
index 1d4db1943936f3ea4958e6d69428d59fcce5a3c2..3754eb06fb41d3551b434d6f237099b5db01bad1 100644 (file)
@@ -59,6 +59,7 @@ obj-$(CONFIG_NF_LOG_SYSLOG) += nf_log_syslog.o
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
 nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
+nf_nat-$(CONFIG_NF_NAT_OVS) += nf_nat_ovs.o
 
 ifeq ($(CONFIG_NF_NAT),m)
 nf_nat-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_nat_bpf.o
diff --git a/net/netfilter/nf_nat_ovs.c b/net/netfilter/nf_nat_ovs.c
new file mode 100644 (file)
index 0000000..551abd2
--- /dev/null
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Support nat functions for openvswitch and used by OVS and TC conntrack. */
+
+#include <net/netfilter/nf_nat.h>
+
+/* Modelled after nf_nat_ipv[46]_fn().
+ * range is only used for new, uninitialized NAT state.
+ * Returns either NF_ACCEPT or NF_DROP.
+ */
+static int nf_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
+                            enum ip_conntrack_info ctinfo, int *action,
+                            const struct nf_nat_range2 *range,
+                            enum nf_nat_manip_type maniptype)
+{
+       __be16 proto = skb_protocol(skb, true);
+       int hooknum, err = NF_ACCEPT;
+
+       /* See HOOK2MANIP(). */
+       if (maniptype == NF_NAT_MANIP_SRC)
+               hooknum = NF_INET_LOCAL_IN; /* Source NAT */
+       else
+               hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
+
+       switch (ctinfo) {
+       case IP_CT_RELATED:
+       case IP_CT_RELATED_REPLY:
+               if (proto == htons(ETH_P_IP) &&
+                   ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+                       if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+                                                          hooknum))
+                               err = NF_DROP;
+                       goto out;
+               } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) {
+                       __be16 frag_off;
+                       u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+                       int hdrlen = ipv6_skip_exthdr(skb,
+                                                     sizeof(struct ipv6hdr),
+                                                     &nexthdr, &frag_off);
+
+                       if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+                               if (!nf_nat_icmpv6_reply_translation(skb, ct,
+                                                                    ctinfo,
+                                                                    hooknum,
+                                                                    hdrlen))
+                                       err = NF_DROP;
+                               goto out;
+                       }
+               }
+               /* Non-ICMP, fall thru to initialize if needed. */
+               fallthrough;
+       case IP_CT_NEW:
+               /* Seen it before?  This can happen for loopback, retrans,
+                * or local packets.
+                */
+               if (!nf_nat_initialized(ct, maniptype)) {
+                       /* Initialize according to the NAT action. */
+                       err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
+                               /* Action is set up to establish a new
+                                * mapping.
+                                */
+                               ? nf_nat_setup_info(ct, range, maniptype)
+                               : nf_nat_alloc_null_binding(ct, hooknum);
+                       if (err != NF_ACCEPT)
+                               goto out;
+               }
+               break;
+
+       case IP_CT_ESTABLISHED:
+       case IP_CT_ESTABLISHED_REPLY:
+               break;
+
+       default:
+               err = NF_DROP;
+               goto out;
+       }
+
+       err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+       if (err == NF_ACCEPT)
+               *action |= BIT(maniptype);
+out:
+       return err;
+}
+
+int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct,
+             enum ip_conntrack_info ctinfo, int *action,
+             const struct nf_nat_range2 *range, bool commit)
+{
+       enum nf_nat_manip_type maniptype;
+       int err, ct_action = *action;
+
+       *action = 0;
+
+       /* Add NAT extension if not confirmed yet. */
+       if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
+               return NF_DROP;   /* Can't NAT. */
+
+       if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) &&
+           (ctinfo != IP_CT_RELATED || commit)) {
+               /* NAT an established or related connection like before. */
+               if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
+                       /* This is the REPLY direction for a connection
+                        * for which NAT was applied in the forward
+                        * direction.  Do the reverse NAT.
+                        */
+                       maniptype = ct->status & IPS_SRC_NAT
+                               ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
+               else
+                       maniptype = ct->status & IPS_SRC_NAT
+                               ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
+       } else if (ct_action & BIT(NF_NAT_MANIP_SRC)) {
+               maniptype = NF_NAT_MANIP_SRC;
+       } else if (ct_action & BIT(NF_NAT_MANIP_DST)) {
+               maniptype = NF_NAT_MANIP_DST;
+       } else {
+               return NF_ACCEPT;
+       }
+
+       err = nf_ct_nat_execute(skb, ct, ctinfo, action, range, maniptype);
+       if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+               if (ct->status & IPS_SRC_NAT) {
+                       if (maniptype == NF_NAT_MANIP_SRC)
+                               maniptype = NF_NAT_MANIP_DST;
+                       else
+                               maniptype = NF_NAT_MANIP_SRC;
+
+                       err = nf_ct_nat_execute(skb, ct, ctinfo, action, range,
+                                               maniptype);
+               } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+                       err = nf_ct_nat_execute(skb, ct, ctinfo, action, NULL,
+                                               NF_NAT_MANIP_SRC);
+               }
+       }
+       return err;
+}
+EXPORT_SYMBOL_GPL(nf_ct_nat);
index 15bd287f5cbdbf879791ab873f3c572ec28190da..747d537a3f066358b9aaabaf28cda5798189c546 100644 (file)
@@ -15,6 +15,7 @@ config OPENVSWITCH
        select NET_MPLS_GSO
        select DST_CACHE
        select NET_NSH
+       select NF_NAT_OVS if NF_NAT
        help
          Open vSwitch is a multilayer Ethernet switch targeted at virtualized
          environments.  In addition to supporting a variety of features
index 58c9f0edc3c4210b194884512ab5dce02035887a..c8b137649ca4637af035c41f804189d136538966 100644 (file)
@@ -726,144 +726,27 @@ static void ovs_nat_update_key(struct sw_flow_key *key,
        }
 }
 
-/* Modelled after nf_nat_ipv[46]_fn().
- * range is only used for new, uninitialized NAT state.
- * Returns either NF_ACCEPT or NF_DROP.
- */
-static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
-                             enum ip_conntrack_info ctinfo,
-                             const struct nf_nat_range2 *range,
-                             enum nf_nat_manip_type maniptype, struct sw_flow_key *key)
-{
-       int hooknum, err = NF_ACCEPT;
-
-       /* See HOOK2MANIP(). */
-       if (maniptype == NF_NAT_MANIP_SRC)
-               hooknum = NF_INET_LOCAL_IN; /* Source NAT */
-       else
-               hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
-
-       switch (ctinfo) {
-       case IP_CT_RELATED:
-       case IP_CT_RELATED_REPLY:
-               if (IS_ENABLED(CONFIG_NF_NAT) &&
-                   skb->protocol == htons(ETH_P_IP) &&
-                   ip_hdr(skb)->protocol == IPPROTO_ICMP) {
-                       if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
-                                                          hooknum))
-                               err = NF_DROP;
-                       goto out;
-               } else if (IS_ENABLED(CONFIG_IPV6) &&
-                          skb->protocol == htons(ETH_P_IPV6)) {
-                       __be16 frag_off;
-                       u8 nexthdr = ipv6_hdr(skb)->nexthdr;
-                       int hdrlen = ipv6_skip_exthdr(skb,
-                                                     sizeof(struct ipv6hdr),
-                                                     &nexthdr, &frag_off);
-
-                       if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
-                               if (!nf_nat_icmpv6_reply_translation(skb, ct,
-                                                                    ctinfo,
-                                                                    hooknum,
-                                                                    hdrlen))
-                                       err = NF_DROP;
-                               goto out;
-                       }
-               }
-               /* Non-ICMP, fall thru to initialize if needed. */
-               fallthrough;
-       case IP_CT_NEW:
-               /* Seen it before?  This can happen for loopback, retrans,
-                * or local packets.
-                */
-               if (!nf_nat_initialized(ct, maniptype)) {
-                       /* Initialize according to the NAT action. */
-                       err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
-                               /* Action is set up to establish a new
-                                * mapping.
-                                */
-                               ? nf_nat_setup_info(ct, range, maniptype)
-                               : nf_nat_alloc_null_binding(ct, hooknum);
-                       if (err != NF_ACCEPT)
-                               goto out;
-               }
-               break;
-
-       case IP_CT_ESTABLISHED:
-       case IP_CT_ESTABLISHED_REPLY:
-               break;
-
-       default:
-               err = NF_DROP;
-               goto out;
-       }
-
-       err = nf_nat_packet(ct, ctinfo, hooknum, skb);
-out:
-       /* Update the flow key if NAT successful. */
-       if (err == NF_ACCEPT)
-               ovs_nat_update_key(key, skb, maniptype);
-
-       return err;
-}
-
 /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
                      const struct ovs_conntrack_info *info,
                      struct sk_buff *skb, struct nf_conn *ct,
                      enum ip_conntrack_info ctinfo)
 {
-       enum nf_nat_manip_type maniptype;
-       int err;
+       int err, action = 0;
 
        if (!(info->nat & OVS_CT_NAT))
                return NF_ACCEPT;
+       if (info->nat & OVS_CT_SRC_NAT)
+               action |= BIT(NF_NAT_MANIP_SRC);
+       if (info->nat & OVS_CT_DST_NAT)
+               action |= BIT(NF_NAT_MANIP_DST);
 
-       /* Add NAT extension if not confirmed yet. */
-       if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
-               return NF_DROP;   /* Can't NAT. */
+       err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
 
-       /* Determine NAT type.
-        * Check if the NAT type can be deduced from the tracked connection.
-        * Make sure new expected connections (IP_CT_RELATED) are NATted only
-        * when committing.
-        */
-       if (ctinfo != IP_CT_NEW && ct->status & IPS_NAT_MASK &&
-           (ctinfo != IP_CT_RELATED || info->commit)) {
-               /* NAT an established or related connection like before. */
-               if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
-                       /* This is the REPLY direction for a connection
-                        * for which NAT was applied in the forward
-                        * direction.  Do the reverse NAT.
-                        */
-                       maniptype = ct->status & IPS_SRC_NAT
-                               ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
-               else
-                       maniptype = ct->status & IPS_SRC_NAT
-                               ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
-       } else if (info->nat & OVS_CT_SRC_NAT) {
-               maniptype = NF_NAT_MANIP_SRC;
-       } else if (info->nat & OVS_CT_DST_NAT) {
-               maniptype = NF_NAT_MANIP_DST;
-       } else {
-               return NF_ACCEPT; /* Connection is not NATed. */
-       }
-       err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);
-
-       if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
-               if (ct->status & IPS_SRC_NAT) {
-                       if (maniptype == NF_NAT_MANIP_SRC)
-                               maniptype = NF_NAT_MANIP_DST;
-                       else
-                               maniptype = NF_NAT_MANIP_SRC;
-
-                       err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
-                                                maniptype, key);
-               } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
-                       err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
-                                                NF_NAT_MANIP_SRC, key);
-               }
-       }
+       if (action & BIT(NF_NAT_MANIP_SRC))
+               ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
+       if (action & BIT(NF_NAT_MANIP_DST))
+               ovs_nat_update_key(key, skb, NF_NAT_MANIP_DST);
 
        return err;
 }
index 4662a6ce8a7e7a91f81dbca1e096e55789f93cd4..777d6b50505cc4b81596e7296f94f5a1cf06a95d 100644 (file)
@@ -977,6 +977,7 @@ config NET_ACT_TUNNEL_KEY
 config NET_ACT_CT
        tristate "connection tracking tc action"
        depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE
+       select NF_NAT_OVS if NF_NAT
        help
          Say Y here to allow sending the packets to conntrack module.
 
index eac4e07eb56b735b945f100ef4eae7758d760d24..0ca2bb8ed02608c815c2669dfbbf4d2bf25aeacd 100644 (file)
@@ -864,90 +864,6 @@ static void tcf_ct_params_free_rcu(struct rcu_head *head)
        tcf_ct_params_free(params);
 }
 
-#if IS_ENABLED(CONFIG_NF_NAT)
-/* Modelled after nf_nat_ipv[46]_fn().
- * range is only used for new, uninitialized NAT state.
- * Returns either NF_ACCEPT or NF_DROP.
- */
-static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
-                         enum ip_conntrack_info ctinfo,
-                         const struct nf_nat_range2 *range,
-                         enum nf_nat_manip_type maniptype)
-{
-       __be16 proto = skb_protocol(skb, true);
-       int hooknum, err = NF_ACCEPT;
-
-       /* See HOOK2MANIP(). */
-       if (maniptype == NF_NAT_MANIP_SRC)
-               hooknum = NF_INET_LOCAL_IN; /* Source NAT */
-       else
-               hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
-
-       switch (ctinfo) {
-       case IP_CT_RELATED:
-       case IP_CT_RELATED_REPLY:
-               if (proto == htons(ETH_P_IP) &&
-                   ip_hdr(skb)->protocol == IPPROTO_ICMP) {
-                       if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
-                                                          hooknum))
-                               err = NF_DROP;
-                       goto out;
-               } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) {
-                       __be16 frag_off;
-                       u8 nexthdr = ipv6_hdr(skb)->nexthdr;
-                       int hdrlen = ipv6_skip_exthdr(skb,
-                                                     sizeof(struct ipv6hdr),
-                                                     &nexthdr, &frag_off);
-
-                       if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
-                               if (!nf_nat_icmpv6_reply_translation(skb, ct,
-                                                                    ctinfo,
-                                                                    hooknum,
-                                                                    hdrlen))
-                                       err = NF_DROP;
-                               goto out;
-                       }
-               }
-               /* Non-ICMP, fall thru to initialize if needed. */
-               fallthrough;
-       case IP_CT_NEW:
-               /* Seen it before?  This can happen for loopback, retrans,
-                * or local packets.
-                */
-               if (!nf_nat_initialized(ct, maniptype)) {
-                       /* Initialize according to the NAT action. */
-                       err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
-                               /* Action is set up to establish a new
-                                * mapping.
-                                */
-                               ? nf_nat_setup_info(ct, range, maniptype)
-                               : nf_nat_alloc_null_binding(ct, hooknum);
-                       if (err != NF_ACCEPT)
-                               goto out;
-               }
-               break;
-
-       case IP_CT_ESTABLISHED:
-       case IP_CT_ESTABLISHED_REPLY:
-               break;
-
-       default:
-               err = NF_DROP;
-               goto out;
-       }
-
-       err = nf_nat_packet(ct, ctinfo, hooknum, skb);
-out:
-       if (err == NF_ACCEPT) {
-               if (maniptype == NF_NAT_MANIP_SRC)
-                       tc_skb_cb(skb)->post_ct_snat = 1;
-               if (maniptype == NF_NAT_MANIP_DST)
-                       tc_skb_cb(skb)->post_ct_dnat = 1;
-       }
-       return err;
-}
-#endif /* CONFIG_NF_NAT */
-
 static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
@@ -987,52 +903,22 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
                          bool commit)
 {
 #if IS_ENABLED(CONFIG_NF_NAT)
-       int err;
-       enum nf_nat_manip_type maniptype;
+       int err, action = 0;
 
        if (!(ct_action & TCA_CT_ACT_NAT))
                return NF_ACCEPT;
+       if (ct_action & TCA_CT_ACT_NAT_SRC)
+               action |= BIT(NF_NAT_MANIP_SRC);
+       if (ct_action & TCA_CT_ACT_NAT_DST)
+               action |= BIT(NF_NAT_MANIP_DST);
 
-       /* Add NAT extension if not confirmed yet. */
-       if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
-               return NF_DROP;   /* Can't NAT. */
-
-       if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) &&
-           (ctinfo != IP_CT_RELATED || commit)) {
-               /* NAT an established or related connection like before. */
-               if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
-                       /* This is the REPLY direction for a connection
-                        * for which NAT was applied in the forward
-                        * direction.  Do the reverse NAT.
-                        */
-                       maniptype = ct->status & IPS_SRC_NAT
-                               ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
-               else
-                       maniptype = ct->status & IPS_SRC_NAT
-                               ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
-       } else if (ct_action & TCA_CT_ACT_NAT_SRC) {
-               maniptype = NF_NAT_MANIP_SRC;
-       } else if (ct_action & TCA_CT_ACT_NAT_DST) {
-               maniptype = NF_NAT_MANIP_DST;
-       } else {
-               return NF_ACCEPT;
-       }
+       err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit);
+
+       if (action & BIT(NF_NAT_MANIP_SRC))
+               tc_skb_cb(skb)->post_ct_snat = 1;
+       if (action & BIT(NF_NAT_MANIP_DST))
+               tc_skb_cb(skb)->post_ct_dnat = 1;
 
-       err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
-       if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
-               if (ct->status & IPS_SRC_NAT) {
-                       if (maniptype == NF_NAT_MANIP_SRC)
-                               maniptype = NF_NAT_MANIP_DST;
-                       else
-                               maniptype = NF_NAT_MANIP_SRC;
-
-                       err = ct_nat_execute(skb, ct, ctinfo, range,
-                                            maniptype);
-               } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
-                       err = ct_nat_execute(skb, ct, ctinfo, NULL,
-                                            NF_NAT_MANIP_SRC);
-               }
-       }
        return err;
 #else
        return NF_ACCEPT;