Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[sfrench/cifs-2.6.git] / net / sched / sch_red.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c  Random Early Detection queue.
4  *
5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914: computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21
22
23 /*      Parameters, settable by user:
24         -----------------------------
25
26         limit           - bytes (must be > qth_max + burst)
27
28         Hard limit on queue length, should be chosen >qth_max
29         to allow packet bursts. This parameter does not
30         affect the algorithms behaviour and can be chosen
31         arbitrarily high (well, less than ram size)
32         Really, this limit will never be reached
33         if RED works correctly.
34  */
35
36 struct red_sched_data {
37         u32                     limit;          /* HARD maximal queue length */
38         unsigned char           flags;
39         struct timer_list       adapt_timer;
40         struct Qdisc            *sch;
41         struct red_parms        parms;
42         struct red_vars         vars;
43         struct red_stats        stats;
44         struct Qdisc            *qdisc;
45 };
46
47 static inline int red_use_ecn(struct red_sched_data *q)
48 {
49         return q->flags & TC_RED_ECN;
50 }
51
52 static inline int red_use_harddrop(struct red_sched_data *q)
53 {
54         return q->flags & TC_RED_HARDDROP;
55 }
56
57 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
58                        struct sk_buff **to_free)
59 {
60         struct red_sched_data *q = qdisc_priv(sch);
61         struct Qdisc *child = q->qdisc;
62         int ret;
63
64         q->vars.qavg = red_calc_qavg(&q->parms,
65                                      &q->vars,
66                                      child->qstats.backlog);
67
68         if (red_is_idling(&q->vars))
69                 red_end_of_idle_period(&q->vars);
70
71         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
72         case RED_DONT_MARK:
73                 break;
74
75         case RED_PROB_MARK:
76                 qdisc_qstats_overlimit(sch);
77                 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
78                         q->stats.prob_drop++;
79                         goto congestion_drop;
80                 }
81
82                 q->stats.prob_mark++;
83                 break;
84
85         case RED_HARD_MARK:
86                 qdisc_qstats_overlimit(sch);
87                 if (red_use_harddrop(q) || !red_use_ecn(q) ||
88                     !INET_ECN_set_ce(skb)) {
89                         q->stats.forced_drop++;
90                         goto congestion_drop;
91                 }
92
93                 q->stats.forced_mark++;
94                 break;
95         }
96
97         ret = qdisc_enqueue(skb, child, to_free);
98         if (likely(ret == NET_XMIT_SUCCESS)) {
99                 qdisc_qstats_backlog_inc(sch, skb);
100                 sch->q.qlen++;
101         } else if (net_xmit_drop_count(ret)) {
102                 q->stats.pdrop++;
103                 qdisc_qstats_drop(sch);
104         }
105         return ret;
106
107 congestion_drop:
108         qdisc_drop(skb, sch, to_free);
109         return NET_XMIT_CN;
110 }
111
112 static struct sk_buff *red_dequeue(struct Qdisc *sch)
113 {
114         struct sk_buff *skb;
115         struct red_sched_data *q = qdisc_priv(sch);
116         struct Qdisc *child = q->qdisc;
117
118         skb = child->dequeue(child);
119         if (skb) {
120                 qdisc_bstats_update(sch, skb);
121                 qdisc_qstats_backlog_dec(sch, skb);
122                 sch->q.qlen--;
123         } else {
124                 if (!red_is_idling(&q->vars))
125                         red_start_of_idle_period(&q->vars);
126         }
127         return skb;
128 }
129
130 static struct sk_buff *red_peek(struct Qdisc *sch)
131 {
132         struct red_sched_data *q = qdisc_priv(sch);
133         struct Qdisc *child = q->qdisc;
134
135         return child->ops->peek(child);
136 }
137
138 static void red_reset(struct Qdisc *sch)
139 {
140         struct red_sched_data *q = qdisc_priv(sch);
141
142         qdisc_reset(q->qdisc);
143         sch->qstats.backlog = 0;
144         sch->q.qlen = 0;
145         red_restart(&q->vars);
146 }
147
148 static int red_offload(struct Qdisc *sch, bool enable)
149 {
150         struct red_sched_data *q = qdisc_priv(sch);
151         struct net_device *dev = qdisc_dev(sch);
152         struct tc_red_qopt_offload opt = {
153                 .handle = sch->handle,
154                 .parent = sch->parent,
155         };
156
157         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
158                 return -EOPNOTSUPP;
159
160         if (enable) {
161                 opt.command = TC_RED_REPLACE;
162                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
163                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
164                 opt.set.probability = q->parms.max_P;
165                 opt.set.limit = q->limit;
166                 opt.set.is_ecn = red_use_ecn(q);
167                 opt.set.is_harddrop = red_use_harddrop(q);
168                 opt.set.qstats = &sch->qstats;
169         } else {
170                 opt.command = TC_RED_DESTROY;
171         }
172
173         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
174 }
175
176 static void red_destroy(struct Qdisc *sch)
177 {
178         struct red_sched_data *q = qdisc_priv(sch);
179
180         del_timer_sync(&q->adapt_timer);
181         red_offload(sch, false);
182         qdisc_put(q->qdisc);
183 }
184
185 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
186         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
187         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
188         [TCA_RED_MAX_P] = { .type = NLA_U32 },
189 };
190
191 static int red_change(struct Qdisc *sch, struct nlattr *opt,
192                       struct netlink_ext_ack *extack)
193 {
194         struct Qdisc *old_child = NULL, *child = NULL;
195         struct red_sched_data *q = qdisc_priv(sch);
196         struct nlattr *tb[TCA_RED_MAX + 1];
197         struct tc_red_qopt *ctl;
198         int err;
199         u32 max_P;
200
201         if (opt == NULL)
202                 return -EINVAL;
203
204         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
205                                           NULL);
206         if (err < 0)
207                 return err;
208
209         if (tb[TCA_RED_PARMS] == NULL ||
210             tb[TCA_RED_STAB] == NULL)
211                 return -EINVAL;
212
213         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
214
215         ctl = nla_data(tb[TCA_RED_PARMS]);
216         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
217                 return -EINVAL;
218
219         if (ctl->limit > 0) {
220                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
221                                          extack);
222                 if (IS_ERR(child))
223                         return PTR_ERR(child);
224
225                 /* child is fifo, no need to check for noop_qdisc */
226                 qdisc_hash_add(child, true);
227         }
228
229         sch_tree_lock(sch);
230         q->flags = ctl->flags;
231         q->limit = ctl->limit;
232         if (child) {
233                 qdisc_tree_flush_backlog(q->qdisc);
234                 old_child = q->qdisc;
235                 q->qdisc = child;
236         }
237
238         red_set_parms(&q->parms,
239                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
240                       ctl->Plog, ctl->Scell_log,
241                       nla_data(tb[TCA_RED_STAB]),
242                       max_P);
243         red_set_vars(&q->vars);
244
245         del_timer(&q->adapt_timer);
246         if (ctl->flags & TC_RED_ADAPTATIVE)
247                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
248
249         if (!q->qdisc->q.qlen)
250                 red_start_of_idle_period(&q->vars);
251
252         sch_tree_unlock(sch);
253
254         red_offload(sch, true);
255
256         if (old_child)
257                 qdisc_put(old_child);
258         return 0;
259 }
260
261 static inline void red_adaptative_timer(struct timer_list *t)
262 {
263         struct red_sched_data *q = from_timer(q, t, adapt_timer);
264         struct Qdisc *sch = q->sch;
265         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
266
267         spin_lock(root_lock);
268         red_adaptative_algo(&q->parms, &q->vars);
269         mod_timer(&q->adapt_timer, jiffies + HZ/2);
270         spin_unlock(root_lock);
271 }
272
273 static int red_init(struct Qdisc *sch, struct nlattr *opt,
274                     struct netlink_ext_ack *extack)
275 {
276         struct red_sched_data *q = qdisc_priv(sch);
277
278         q->qdisc = &noop_qdisc;
279         q->sch = sch;
280         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
281         return red_change(sch, opt, extack);
282 }
283
284 static int red_dump_offload_stats(struct Qdisc *sch)
285 {
286         struct tc_red_qopt_offload hw_stats = {
287                 .command = TC_RED_STATS,
288                 .handle = sch->handle,
289                 .parent = sch->parent,
290                 {
291                         .stats.bstats = &sch->bstats,
292                         .stats.qstats = &sch->qstats,
293                 },
294         };
295
296         return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
297 }
298
299 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
300 {
301         struct red_sched_data *q = qdisc_priv(sch);
302         struct nlattr *opts = NULL;
303         struct tc_red_qopt opt = {
304                 .limit          = q->limit,
305                 .flags          = q->flags,
306                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
307                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
308                 .Wlog           = q->parms.Wlog,
309                 .Plog           = q->parms.Plog,
310                 .Scell_log      = q->parms.Scell_log,
311         };
312         int err;
313
314         err = red_dump_offload_stats(sch);
315         if (err)
316                 goto nla_put_failure;
317
318         opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
319         if (opts == NULL)
320                 goto nla_put_failure;
321         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
322             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
323                 goto nla_put_failure;
324         return nla_nest_end(skb, opts);
325
326 nla_put_failure:
327         nla_nest_cancel(skb, opts);
328         return -EMSGSIZE;
329 }
330
331 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
332 {
333         struct red_sched_data *q = qdisc_priv(sch);
334         struct net_device *dev = qdisc_dev(sch);
335         struct tc_red_xstats st = {0};
336
337         if (sch->flags & TCQ_F_OFFLOADED) {
338                 struct tc_red_qopt_offload hw_stats_request = {
339                         .command = TC_RED_XSTATS,
340                         .handle = sch->handle,
341                         .parent = sch->parent,
342                         {
343                                 .xstats = &q->stats,
344                         },
345                 };
346                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
347                                               &hw_stats_request);
348         }
349         st.early = q->stats.prob_drop + q->stats.forced_drop;
350         st.pdrop = q->stats.pdrop;
351         st.other = q->stats.other;
352         st.marked = q->stats.prob_mark + q->stats.forced_mark;
353
354         return gnet_stats_copy_app(d, &st, sizeof(st));
355 }
356
357 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
358                           struct sk_buff *skb, struct tcmsg *tcm)
359 {
360         struct red_sched_data *q = qdisc_priv(sch);
361
362         tcm->tcm_handle |= TC_H_MIN(1);
363         tcm->tcm_info = q->qdisc->handle;
364         return 0;
365 }
366
367 static void red_graft_offload(struct Qdisc *sch,
368                               struct Qdisc *new, struct Qdisc *old,
369                               struct netlink_ext_ack *extack)
370 {
371         struct tc_red_qopt_offload graft_offload = {
372                 .handle         = sch->handle,
373                 .parent         = sch->parent,
374                 .child_handle   = new->handle,
375                 .command        = TC_RED_GRAFT,
376         };
377
378         qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
379                                    TC_SETUP_QDISC_RED, &graft_offload, extack);
380 }
381
382 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
383                      struct Qdisc **old, struct netlink_ext_ack *extack)
384 {
385         struct red_sched_data *q = qdisc_priv(sch);
386
387         if (new == NULL)
388                 new = &noop_qdisc;
389
390         *old = qdisc_replace(sch, new, &q->qdisc);
391
392         red_graft_offload(sch, new, *old, extack);
393         return 0;
394 }
395
396 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
397 {
398         struct red_sched_data *q = qdisc_priv(sch);
399         return q->qdisc;
400 }
401
402 static unsigned long red_find(struct Qdisc *sch, u32 classid)
403 {
404         return 1;
405 }
406
407 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
408 {
409         if (!walker->stop) {
410                 if (walker->count >= walker->skip)
411                         if (walker->fn(sch, 1, walker) < 0) {
412                                 walker->stop = 1;
413                                 return;
414                         }
415                 walker->count++;
416         }
417 }
418
419 static const struct Qdisc_class_ops red_class_ops = {
420         .graft          =       red_graft,
421         .leaf           =       red_leaf,
422         .find           =       red_find,
423         .walk           =       red_walk,
424         .dump           =       red_dump_class,
425 };
426
427 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
428         .id             =       "red",
429         .priv_size      =       sizeof(struct red_sched_data),
430         .cl_ops         =       &red_class_ops,
431         .enqueue        =       red_enqueue,
432         .dequeue        =       red_dequeue,
433         .peek           =       red_peek,
434         .init           =       red_init,
435         .reset          =       red_reset,
436         .destroy        =       red_destroy,
437         .change         =       red_change,
438         .dump           =       red_dump,
439         .dump_stats     =       red_dump_stats,
440         .owner          =       THIS_MODULE,
441 };
442
443 static int __init red_module_init(void)
444 {
445         return register_qdisc(&red_qdisc_ops);
446 }
447
448 static void __exit red_module_exit(void)
449 {
450         unregister_qdisc(&red_qdisc_ops);
451 }
452
453 module_init(red_module_init)
454 module_exit(red_module_exit)
455
456 MODULE_LICENSE("GPL");