Merge tag 'pinctrl-v4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[sfrench/cifs-2.6.git] / net / sched / sch_gred.c
index 4a042abf844c0b1907e7dcec6234234898bcdcc2..234afbf9115b7fbb7713fe12f74ea099fb69e8e2 100644 (file)
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <net/pkt_cls.h>
 #include <net/pkt_sched.h>
 #include <net/red.h>
 
 #define GRED_DEF_PRIO (MAX_DPs / 2)
 #define GRED_VQ_MASK (MAX_DPs - 1)
 
+#define GRED_VQ_RED_FLAGS      (TC_RED_ECN | TC_RED_HARDDROP)
+
 struct gred_sched_data;
 struct gred_sched;
 
 struct gred_sched_data {
        u32             limit;          /* HARD maximal queue length    */
        u32             DP;             /* the drop parameters */
-       u32             bytesin;        /* bytes seen on virtualQ so far*/
+       u32             red_flags;      /* virtualQ version of red_flags */
+       u64             bytesin;        /* bytes seen on virtualQ so far*/
        u32             packetsin;      /* packets seen on virtualQ so far*/
        u32             backlog;        /* bytes on the virtualQ */
        u8              prio;           /* the prio of this vq */
@@ -139,14 +143,27 @@ static inline void gred_store_wred_set(struct gred_sched *table,
        table->wred_set.qidlestart = q->vars.qidlestart;
 }
 
-static inline int gred_use_ecn(struct gred_sched *t)
+static int gred_use_ecn(struct gred_sched_data *q)
+{
+       return q->red_flags & TC_RED_ECN;
+}
+
+static int gred_use_harddrop(struct gred_sched_data *q)
 {
-       return t->red_flags & TC_RED_ECN;
+       return q->red_flags & TC_RED_HARDDROP;
 }
 
-static inline int gred_use_harddrop(struct gred_sched *t)
+static bool gred_per_vq_red_flags_used(struct gred_sched *table)
 {
-       return t->red_flags & TC_RED_HARDDROP;
+       unsigned int i;
+
+       /* Local per-vq flags couldn't have been set unless global are 0 */
+       if (table->red_flags)
+               return false;
+       for (i = 0; i < MAX_DPs; i++)
+               if (table->tab[i] && table->tab[i]->red_flags)
+                       return true;
+       return false;
 }
 
 static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -212,7 +229,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
        case RED_PROB_MARK:
                qdisc_qstats_overlimit(sch);
-               if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+               if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) {
                        q->stats.prob_drop++;
                        goto congestion_drop;
                }
@@ -222,7 +239,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
        case RED_HARD_MARK:
                qdisc_qstats_overlimit(sch);
-               if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+               if (gred_use_harddrop(q) || !gred_use_ecn(q) ||
                    !INET_ECN_set_ce(skb)) {
                        q->stats.forced_drop++;
                        goto congestion_drop;
@@ -295,15 +312,103 @@ static void gred_reset(struct Qdisc *sch)
        }
 }
 
+static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
+{
+       struct gred_sched *table = qdisc_priv(sch);
+       struct net_device *dev = qdisc_dev(sch);
+       struct tc_gred_qopt_offload opt = {
+               .command        = command,
+               .handle         = sch->handle,
+               .parent         = sch->parent,
+       };
+
+       if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+               return;
+
+       if (command == TC_GRED_REPLACE) {
+               unsigned int i;
+
+               opt.set.grio_on = gred_rio_mode(table);
+               opt.set.wred_on = gred_wred_mode(table);
+               opt.set.dp_cnt = table->DPs;
+               opt.set.dp_def = table->def;
+
+               for (i = 0; i < table->DPs; i++) {
+                       struct gred_sched_data *q = table->tab[i];
+
+                       if (!q)
+                               continue;
+                       opt.set.tab[i].present = true;
+                       opt.set.tab[i].limit = q->limit;
+                       opt.set.tab[i].prio = q->prio;
+                       opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
+                       opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
+                       opt.set.tab[i].is_ecn = gred_use_ecn(q);
+                       opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
+                       opt.set.tab[i].probability = q->parms.max_P;
+                       opt.set.tab[i].backlog = &q->backlog;
+               }
+               opt.set.qstats = &sch->qstats;
+       }
+
+       dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
+}
+
+static int gred_offload_dump_stats(struct Qdisc *sch)
+{
+       struct gred_sched *table = qdisc_priv(sch);
+       struct tc_gred_qopt_offload *hw_stats;
+       unsigned int i;
+       int ret;
+
+       hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL);
+       if (!hw_stats)
+               return -ENOMEM;
+
+       hw_stats->command = TC_GRED_STATS;
+       hw_stats->handle = sch->handle;
+       hw_stats->parent = sch->parent;
+
+       for (i = 0; i < MAX_DPs; i++)
+               if (table->tab[i])
+                       hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+
+       ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
+       /* Even if driver returns failure adjust the stats - in case offload
+        * ended but driver still wants to adjust the values.
+        */
+       for (i = 0; i < MAX_DPs; i++) {
+               if (!table->tab[i])
+                       continue;
+               table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
+               table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
+               table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
+
+               _bstats_update(&sch->bstats,
+                              hw_stats->stats.bstats[i].bytes,
+                              hw_stats->stats.bstats[i].packets);
+               sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
+               sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
+               sch->qstats.drops += hw_stats->stats.qstats[i].drops;
+               sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
+               sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
+       }
+
+       kfree(hw_stats);
+       return ret;
+}
+
 static inline void gred_destroy_vq(struct gred_sched_data *q)
 {
        kfree(q);
 }
 
-static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
+static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps,
+                                struct netlink_ext_ack *extack)
 {
        struct gred_sched *table = qdisc_priv(sch);
        struct tc_gred_sopt *sopt;
+       bool red_flags_changed;
        int i;
 
        if (!dps)
@@ -311,13 +416,28 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 
        sopt = nla_data(dps);
 
-       if (sopt->DPs > MAX_DPs || sopt->DPs == 0 ||
-           sopt->def_DP >= sopt->DPs)
+       if (sopt->DPs > MAX_DPs) {
+               NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high");
+               return -EINVAL;
+       }
+       if (sopt->DPs == 0) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "number of virtual queues can't be 0");
+               return -EINVAL;
+       }
+       if (sopt->def_DP >= sopt->DPs) {
+               NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count");
                return -EINVAL;
+       }
+       if (sopt->flags && gred_per_vq_red_flags_used(table)) {
+               NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used");
+               return -EINVAL;
+       }
 
        sch_tree_lock(sch);
        table->DPs = sopt->DPs;
        table->def = sopt->def_DP;
+       red_flags_changed = table->red_flags != sopt->flags;
        table->red_flags = sopt->flags;
 
        /*
@@ -337,6 +457,12 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
                gred_disable_wred_mode(table);
        }
 
+       if (red_flags_changed)
+               for (i = 0; i < table->DPs; i++)
+                       if (table->tab[i])
+                               table->tab[i]->red_flags =
+                                       table->red_flags & GRED_VQ_RED_FLAGS;
+
        for (i = table->DPs; i < MAX_DPs; i++) {
                if (table->tab[i]) {
                        pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n",
@@ -346,25 +472,30 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
                }
        }
 
+       gred_offload(sch, TC_GRED_REPLACE);
        return 0;
 }
 
 static inline int gred_change_vq(struct Qdisc *sch, int dp,
                                 struct tc_gred_qopt *ctl, int prio,
                                 u8 *stab, u32 max_P,
-                                struct gred_sched_data **prealloc)
+                                struct gred_sched_data **prealloc,
+                                struct netlink_ext_ack *extack)
 {
        struct gred_sched *table = qdisc_priv(sch);
        struct gred_sched_data *q = table->tab[dp];
 
-       if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+       if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) {
+               NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
                return -EINVAL;
+       }
 
        if (!q) {
                table->tab[dp] = q = *prealloc;
                *prealloc = NULL;
                if (!q)
                        return -ENOMEM;
+               q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS;
        }
 
        q->DP = dp;
@@ -384,14 +515,127 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
        return 0;
 }
 
+static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = {
+       [TCA_GRED_VQ_DP]        = { .type = NLA_U32 },
+       [TCA_GRED_VQ_FLAGS]     = { .type = NLA_U32 },
+};
+
+static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = {
+       [TCA_GRED_VQ_ENTRY]     = { .type = NLA_NESTED },
+};
+
 static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
        [TCA_GRED_PARMS]        = { .len = sizeof(struct tc_gred_qopt) },
        [TCA_GRED_STAB]         = { .len = 256 },
        [TCA_GRED_DPS]          = { .len = sizeof(struct tc_gred_sopt) },
        [TCA_GRED_MAX_P]        = { .type = NLA_U32 },
        [TCA_GRED_LIMIT]        = { .type = NLA_U32 },
+       [TCA_GRED_VQ_LIST]      = { .type = NLA_NESTED },
 };
 
+static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry)
+{
+       struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
+       u32 dp;
+
+       nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL);
+
+       dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
+
+       if (tb[TCA_GRED_VQ_FLAGS])
+               table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
+}
+
+static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs)
+{
+       const struct nlattr *attr;
+       int rem;
+
+       nla_for_each_nested(attr, vqs, rem) {
+               switch (nla_type(attr)) {
+               case TCA_GRED_VQ_ENTRY:
+                       gred_vq_apply(table, attr);
+                       break;
+               }
+       }
+}
+
+static int gred_vq_validate(struct gred_sched *table, u32 cdp,
+                           const struct nlattr *entry,
+                           struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
+       int err;
+       u32 dp;
+
+       err = nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy,
+                              extack);
+       if (err < 0)
+               return err;
+
+       if (!tb[TCA_GRED_VQ_DP]) {
+               NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified");
+               return -EINVAL;
+       }
+       dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
+       if (dp >= table->DPs) {
+               NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds");
+               return -EINVAL;
+       }
+       if (dp != cdp && !table->tab[dp]) {
+               NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated");
+               return -EINVAL;
+       }
+
+       if (tb[TCA_GRED_VQ_FLAGS]) {
+               u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
+
+               if (table->red_flags && table->red_flags != red_flags) {
+                       NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used");
+                       return -EINVAL;
+               }
+               if (red_flags & ~GRED_VQ_RED_FLAGS) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "invalid RED flags specified");
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int gred_vqs_validate(struct gred_sched *table, u32 cdp,
+                            struct nlattr *vqs, struct netlink_ext_ack *extack)
+{
+       const struct nlattr *attr;
+       int rem, err;
+
+       err = nla_validate_nested(vqs, TCA_GRED_VQ_ENTRY_MAX,
+                                 gred_vqe_policy, extack);
+       if (err < 0)
+               return err;
+
+       nla_for_each_nested(attr, vqs, rem) {
+               switch (nla_type(attr)) {
+               case TCA_GRED_VQ_ENTRY:
+                       err = gred_vq_validate(table, cdp, attr, extack);
+                       if (err)
+                               return err;
+                       break;
+               default:
+                       NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes");
+                       return -EINVAL;
+               }
+       }
+
+       if (rem > 0) {
+               NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int gred_change(struct Qdisc *sch, struct nlattr *opt,
                       struct netlink_ext_ack *extack)
 {
@@ -406,29 +650,39 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
        if (opt == NULL)
                return -EINVAL;
 
-       err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
+       err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack);
        if (err < 0)
                return err;
 
        if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
                if (tb[TCA_GRED_LIMIT] != NULL)
                        sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
-               return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
+               return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
        }
 
        if (tb[TCA_GRED_PARMS] == NULL ||
            tb[TCA_GRED_STAB] == NULL ||
-           tb[TCA_GRED_LIMIT] != NULL)
+           tb[TCA_GRED_LIMIT] != NULL) {
+               NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time");
                return -EINVAL;
+       }
 
        max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
 
-       err = -EINVAL;
        ctl = nla_data(tb[TCA_GRED_PARMS]);
        stab = nla_data(tb[TCA_GRED_STAB]);
 
-       if (ctl->DP >= table->DPs)
-               goto errout;
+       if (ctl->DP >= table->DPs) {
+               NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count");
+               return -EINVAL;
+       }
+
+       if (tb[TCA_GRED_VQ_LIST]) {
+               err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST],
+                                       extack);
+               if (err)
+                       return err;
+       }
 
        if (gred_rio_mode(table)) {
                if (ctl->prio == 0) {
@@ -448,9 +702,13 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
        prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
        sch_tree_lock(sch);
 
-       err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc);
+       err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc,
+                            extack);
        if (err < 0)
-               goto errout_locked;
+               goto err_unlock_free;
+
+       if (tb[TCA_GRED_VQ_LIST])
+               gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]);
 
        if (gred_rio_mode(table)) {
                gred_disable_wred_mode(table);
@@ -458,12 +716,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
                        gred_enable_wred_mode(table);
        }
 
-       err = 0;
+       sch_tree_unlock(sch);
+       kfree(prealloc);
+
+       gred_offload(sch, TC_GRED_REPLACE);
+       return 0;
 
-errout_locked:
+err_unlock_free:
        sch_tree_unlock(sch);
        kfree(prealloc);
-errout:
        return err;
 }
 
@@ -476,12 +737,15 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
        if (!opt)
                return -EINVAL;
 
-       err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
+       err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack);
        if (err < 0)
                return err;
 
-       if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
+       if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "virtual queue configuration can't be specified at initialization time");
                return -EINVAL;
+       }
 
        if (tb[TCA_GRED_LIMIT])
                sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
@@ -489,13 +753,13 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
                sch->limit = qdisc_dev(sch)->tx_queue_len
                             * psched_mtu(qdisc_dev(sch));
 
-       return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
+       return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
 }
 
 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
        struct gred_sched *table = qdisc_priv(sch);
-       struct nlattr *parms, *opts = NULL;
+       struct nlattr *parms, *vqs, *opts = NULL;
        int i;
        u32 max_p[MAX_DPs];
        struct tc_gred_sopt sopt = {
@@ -505,6 +769,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
                .flags  = table->red_flags,
        };
 
+       if (gred_offload_dump_stats(sch))
+               goto nla_put_failure;
+
        opts = nla_nest_start(skb, TCA_OPTIONS);
        if (opts == NULL)
                goto nla_put_failure;
@@ -522,6 +789,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
        if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
                goto nla_put_failure;
 
+       /* Old style all-in-one dump of VQs */
        parms = nla_nest_start(skb, TCA_GRED_PARMS);
        if (parms == NULL)
                goto nla_put_failure;
@@ -572,6 +840,58 @@ append_opt:
 
        nla_nest_end(skb, parms);
 
+       /* Dump the VQs again, in more structured way */
+       vqs = nla_nest_start(skb, TCA_GRED_VQ_LIST);
+       if (!vqs)
+               goto nla_put_failure;
+
+       for (i = 0; i < MAX_DPs; i++) {
+               struct gred_sched_data *q = table->tab[i];
+               struct nlattr *vq;
+
+               if (!q)
+                       continue;
+
+               vq = nla_nest_start(skb, TCA_GRED_VQ_ENTRY);
+               if (!vq)
+                       goto nla_put_failure;
+
+               if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP))
+                       goto nla_put_failure;
+
+               if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags))
+                       goto nla_put_failure;
+
+               /* Stats */
+               if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin,
+                                     TCA_GRED_VQ_PAD))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG,
+                               gred_backlog(table, q, sch)))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP,
+                               q->stats.prob_drop))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK,
+                               q->stats.prob_mark))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP,
+                               q->stats.forced_drop))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK,
+                               q->stats.forced_mark))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop))
+                       goto nla_put_failure;
+               if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other))
+                       goto nla_put_failure;
+
+               nla_nest_end(skb, vq);
+       }
+       nla_nest_end(skb, vqs);
+
        return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -588,6 +908,7 @@ static void gred_destroy(struct Qdisc *sch)
                if (table->tab[i])
                        gred_destroy_vq(table->tab[i]);
        }
+       gred_offload(sch, TC_GRED_DESTROY);
 }
 
 static struct Qdisc_ops gred_qdisc_ops __read_mostly = {