net/sched: tunnel_key: Allow to set tos and ttl for tc based ip tunnels
[sfrench/cifs-2.6.git] / net / sched / act_tunnel_key.c
index 626dac81a48a6b2ab97e9d0c786b08989f693288..22f26e9ea8f15b4a569771fb6edcb1ed016e578b 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <net/geneve.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
@@ -57,6 +58,135 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
        return action;
 }
 
+static const struct nla_policy
+enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = {
+       [TCA_TUNNEL_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
+       [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]      = { .type = NLA_U16 },
+       [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]       = { .type = NLA_U8 },
+       [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]       = { .type = NLA_BINARY,
+                                                      .len = 128 },
+};
+
+static int
+tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
+                          struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1];
+       int err, data_len, opt_len;
+       u8 *data;
+
+       err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
+                              nla, geneve_opt_policy, extack);
+       if (err < 0)
+               return err;
+
+       if (!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] ||
+           !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] ||
+           !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]) {
+               NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
+               return -EINVAL;
+       }
+
+       data = nla_data(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
+       data_len = nla_len(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
+       if (data_len < 4) {
+               NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
+               return -ERANGE;
+       }
+       if (data_len % 4) {
+               NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
+               return -ERANGE;
+       }
+
+       opt_len = sizeof(struct geneve_opt) + data_len;
+       if (dst) {
+               struct geneve_opt *opt = dst;
+
+               WARN_ON(dst_len < opt_len);
+
+               opt->opt_class =
+                       nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]);
+               opt->type = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]);
+               opt->length = data_len / 4; /* length is in units of 4 bytes */
+               opt->r1 = 0;
+               opt->r2 = 0;
+               opt->r3 = 0;
+
+               memcpy(opt + 1, data, data_len);
+       }
+
+       return opt_len;
+}
+
+static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
+                               int dst_len, struct netlink_ext_ack *extack)
+{
+       int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
+       const struct nlattr *attr, *head = nla_data(nla);
+
+       err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
+                          enc_opts_policy, extack);
+       if (err)
+               return err;
+
+       nla_for_each_attr(attr, head, len, rem) {
+               switch (nla_type(attr)) {
+               case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+                       opt_len = tunnel_key_copy_geneve_opt(attr, dst,
+                                                            dst_len, extack);
+                       if (opt_len < 0)
+                               return opt_len;
+                       opts_len += opt_len;
+                       if (dst) {
+                               dst_len -= opt_len;
+                               dst += opt_len;
+                       }
+                       break;
+               }
+       }
+
+       if (!opts_len) {
+               NL_SET_ERR_MSG(extack, "Empty list of tunnel options");
+               return -EINVAL;
+       }
+
+       if (rem > 0) {
+               NL_SET_ERR_MSG(extack, "Trailing data after parsing tunnel key options attributes");
+               return -EINVAL;
+       }
+
+       return opts_len;
+}
+
+static int tunnel_key_get_opts_len(struct nlattr *nla,
+                                  struct netlink_ext_ack *extack)
+{
+       return tunnel_key_copy_opts(nla, NULL, 0, extack);
+}
+
+static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
+                              int opts_len, struct netlink_ext_ack *extack)
+{
+       info->options_len = opts_len;
+       switch (nla_type(nla_data(nla))) {
+       case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+#if IS_ENABLED(CONFIG_INET)
+               info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+               return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+                                           opts_len, extack);
+#else
+               return -EAFNOSUPPORT;
+#endif
+       default:
+               NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type");
+               return -EINVAL;
+       }
+}
+
 static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
        [TCA_TUNNEL_KEY_PARMS]      = { .len = sizeof(struct tc_tunnel_key) },
        [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
@@ -66,11 +196,15 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
        [TCA_TUNNEL_KEY_ENC_KEY_ID]   = { .type = NLA_U32 },
        [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
        [TCA_TUNNEL_KEY_NO_CSUM]      = { .type = NLA_U8 },
+       [TCA_TUNNEL_KEY_ENC_OPTS]     = { .type = NLA_NESTED },
+       [TCA_TUNNEL_KEY_ENC_TOS]      = { .type = NLA_U8 },
+       [TCA_TUNNEL_KEY_ENC_TTL]      = { .type = NLA_U8 },
 };
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, struct netlink_ext_ack *extack)
+                          int ovr, int bind, bool rtnl_held,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
        struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -81,24 +215,35 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
        struct tcf_tunnel_key *t;
        bool exists = false;
        __be16 dst_port = 0;
+       int opts_len = 0;
        __be64 key_id;
        __be16 flags;
+       u8 tos, ttl;
        int ret = 0;
        int err;
 
-       if (!nla)
+       if (!nla) {
+               NL_SET_ERR_MSG(extack, "Tunnel requires attributes to be passed");
                return -EINVAL;
+       }
 
        err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
-                              NULL);
-       if (err < 0)
+                              extack);
+       if (err < 0) {
+               NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes");
                return err;
+       }
 
-       if (!tb[TCA_TUNNEL_KEY_PARMS])
+       if (!tb[TCA_TUNNEL_KEY_PARMS]) {
+               NL_SET_ERR_MSG(extack, "Missing tunnel key parameters");
                return -EINVAL;
+       }
 
        parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
@@ -107,6 +252,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                break;
        case TCA_TUNNEL_KEY_ACT_SET:
                if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+                       NL_SET_ERR_MSG(extack, "Missing tunnel key id");
                        ret = -EINVAL;
                        goto err_out;
                }
@@ -121,6 +267,22 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
                        dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
 
+               if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) {
+                       opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS],
+                                                          extack);
+                       if (opts_len < 0) {
+                               ret = opts_len;
+                               goto err_out;
+                       }
+               }
+
+               tos = 0;
+               if (tb[TCA_TUNNEL_KEY_ENC_TOS])
+                       tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]);
+               ttl = 0;
+               if (tb[TCA_TUNNEL_KEY_ENC_TTL])
+                       ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]);
+
                if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
                    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
                        __be32 saddr;
@@ -129,9 +291,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                        saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
                        daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
 
-                       metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+                       metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl,
                                                    dst_port, flags,
-                                                   key_id, 0);
+                                                   key_id, opts_len);
                } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
                           tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
                        struct in6_addr saddr;
@@ -140,19 +302,33 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                        saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
                        daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
 
-                       metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port,
+                       metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
                                                      0, flags,
                                                      key_id, 0);
+               } else {
+                       NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
+                       ret = -EINVAL;
+                       goto err_out;
                }
 
                if (!metadata) {
-                       ret = -EINVAL;
+                       NL_SET_ERR_MSG(extack, "Cannot allocate tunnel metadata dst");
+                       ret = -ENOMEM;
                        goto err_out;
                }
 
+               if (opts_len) {
+                       ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
+                                                 &metadata->u.tun_info,
+                                                 opts_len, extack);
+                       if (ret < 0)
+                               goto err_out;
+               }
+
                metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
                break;
        default:
+               NL_SET_ERR_MSG(extack, "Unknown tunnel key action");
                ret = -EINVAL;
                goto err_out;
        }
@@ -160,14 +336,16 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_tunnel_key_ops, bind, true);
-               if (ret)
-                       return ret;
+               if (ret) {
+                       NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
+                       goto err_out;
+               }
 
                ret = ACT_P_CREATED;
-       } else {
+       } else if (!ovr) {
                tcf_idr_release(*a, bind);
-               if (!ovr)
-                       return -EEXIST;
+               NL_SET_ERR_MSG(extack, "TC IDR already exists");
+               return -EEXIST;
        }
 
        t = to_tunnel_key(*a);
@@ -175,8 +353,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
        ASSERT_RTNL();
        params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
        if (unlikely(!params_new)) {
-               if (ret == ACT_P_CREATED)
-                       tcf_idr_release(*a, bind);
+               tcf_idr_release(*a, bind);
+               NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
                return -ENOMEM;
        }
 
@@ -199,6 +377,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 err_out:
        if (exists)
                tcf_idr_release(*a, bind);
+       else
+               tcf_idr_cleanup(tn, parm->index);
        return ret;
 }
 
@@ -216,6 +396,61 @@ static void tunnel_key_release(struct tc_action *a)
        }
 }
 
+static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
+                                      const struct ip_tunnel_info *info)
+{
+       int len = info->options_len;
+       u8 *src = (u8 *)(info + 1);
+       struct nlattr *start;
+
+       start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
+       if (!start)
+               return -EMSGSIZE;
+
+       while (len > 0) {
+               struct geneve_opt *opt = (struct geneve_opt *)src;
+
+               if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
+                                opt->opt_class) ||
+                   nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,
+                              opt->type) ||
+                   nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,
+                           opt->length * 4, opt + 1))
+                       return -EMSGSIZE;
+
+               len -= sizeof(struct geneve_opt) + opt->length * 4;
+               src += sizeof(struct geneve_opt) + opt->length * 4;
+       }
+
+       nla_nest_end(skb, start);
+       return 0;
+}
+
+static int tunnel_key_opts_dump(struct sk_buff *skb,
+                               const struct ip_tunnel_info *info)
+{
+       struct nlattr *start;
+       int err;
+
+       if (!info->options_len)
+               return 0;
+
+       start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS);
+       if (!start)
+               return -EMSGSIZE;
+
+       if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+               err = tunnel_key_geneve_opts_dump(skb, info);
+               if (err)
+                       return err;
+       } else {
+               return -EINVAL;
+       }
+
+       nla_nest_end(skb, start);
+       return 0;
+}
+
 static int tunnel_key_dump_addresses(struct sk_buff *skb,
                                     const struct ip_tunnel_info *info)
 {
@@ -252,8 +487,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
        struct tcf_tunnel_key_params *params;
        struct tc_tunnel_key opt = {
                .index    = t->tcf_index,
-               .refcnt   = t->tcf_refcnt - ref,
-               .bindcnt  = t->tcf_bindcnt - bind,
+               .refcnt   = refcount_read(&t->tcf_refcnt) - ref,
+               .bindcnt  = atomic_read(&t->tcf_bindcnt) - bind,
        };
        struct tcf_t tm;
 
@@ -266,8 +501,9 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
                goto nla_put_failure;
 
        if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
-               struct ip_tunnel_key *key =
-                       &params->tcft_enc_metadata->u.tun_info.key;
+               struct ip_tunnel_info *info =
+                       &params->tcft_enc_metadata->u.tun_info;
+               struct ip_tunnel_key *key = &info->key;
                __be32 key_id = tunnel_id_to_key32(key->tun_id);
 
                if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
@@ -275,7 +511,14 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
                                              &params->tcft_enc_metadata->u.tun_info) ||
                    nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) ||
                    nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
-                              !(key->tun_flags & TUNNEL_CSUM)))
+                              !(key->tun_flags & TUNNEL_CSUM)) ||
+                   tunnel_key_opts_dump(skb, info))
+                       goto nla_put_failure;
+
+               if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos))
+                       goto nla_put_failure;
+
+               if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl))
                        goto nla_put_failure;
        }
 
@@ -309,6 +552,13 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tunnel_key_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_tunnel_key_ops = {
        .kind           =       "tunnel_key",
        .type           =       TCA_ACT_TUNNEL_KEY,
@@ -319,6 +569,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
        .cleanup        =       tunnel_key_release,
        .walk           =       tunnel_key_walker,
        .lookup         =       tunnel_key_search,
+       .delete         =       tunnel_key_delete,
        .size           =       sizeof(struct tcf_tunnel_key),
 };