Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/livep...
[sfrench/cifs-2.6.git] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25
26
27 /*      Parameters, settable by user:
28         -----------------------------
29
30         limit           - bytes (must be > qth_max + burst)
31
32         Hard limit on queue length, should be chosen >qth_max
33         to allow packet bursts. This parameter does not
34         affect the algorithms behaviour and can be chosen
35         arbitrarily high (well, less than ram size)
36         Really, this limit will never be reached
37         if RED works correctly.
38  */
39
40 struct red_sched_data {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct timer_list       adapt_timer;
44         struct Qdisc            *sch;
45         struct red_parms        parms;
46         struct red_vars         vars;
47         struct red_stats        stats;
48         struct Qdisc            *qdisc;
49 };
50
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53         return q->flags & TC_RED_ECN;
54 }
55
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58         return q->flags & TC_RED_HARDDROP;
59 }
60
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62                        struct sk_buff **to_free)
63 {
64         struct red_sched_data *q = qdisc_priv(sch);
65         struct Qdisc *child = q->qdisc;
66         int ret;
67
68         q->vars.qavg = red_calc_qavg(&q->parms,
69                                      &q->vars,
70                                      child->qstats.backlog);
71
72         if (red_is_idling(&q->vars))
73                 red_end_of_idle_period(&q->vars);
74
75         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76         case RED_DONT_MARK:
77                 break;
78
79         case RED_PROB_MARK:
80                 qdisc_qstats_overlimit(sch);
81                 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82                         q->stats.prob_drop++;
83                         goto congestion_drop;
84                 }
85
86                 q->stats.prob_mark++;
87                 break;
88
89         case RED_HARD_MARK:
90                 qdisc_qstats_overlimit(sch);
91                 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92                     !INET_ECN_set_ce(skb)) {
93                         q->stats.forced_drop++;
94                         goto congestion_drop;
95                 }
96
97                 q->stats.forced_mark++;
98                 break;
99         }
100
101         ret = qdisc_enqueue(skb, child, to_free);
102         if (likely(ret == NET_XMIT_SUCCESS)) {
103                 qdisc_qstats_backlog_inc(sch, skb);
104                 sch->q.qlen++;
105         } else if (net_xmit_drop_count(ret)) {
106                 q->stats.pdrop++;
107                 qdisc_qstats_drop(sch);
108         }
109         return ret;
110
111 congestion_drop:
112         qdisc_drop(skb, sch, to_free);
113         return NET_XMIT_CN;
114 }
115
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118         struct sk_buff *skb;
119         struct red_sched_data *q = qdisc_priv(sch);
120         struct Qdisc *child = q->qdisc;
121
122         skb = child->dequeue(child);
123         if (skb) {
124                 qdisc_bstats_update(sch, skb);
125                 qdisc_qstats_backlog_dec(sch, skb);
126                 sch->q.qlen--;
127         } else {
128                 if (!red_is_idling(&q->vars))
129                         red_start_of_idle_period(&q->vars);
130         }
131         return skb;
132 }
133
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136         struct red_sched_data *q = qdisc_priv(sch);
137         struct Qdisc *child = q->qdisc;
138
139         return child->ops->peek(child);
140 }
141
142 static void red_reset(struct Qdisc *sch)
143 {
144         struct red_sched_data *q = qdisc_priv(sch);
145
146         qdisc_reset(q->qdisc);
147         sch->qstats.backlog = 0;
148         sch->q.qlen = 0;
149         red_restart(&q->vars);
150 }
151
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154         struct red_sched_data *q = qdisc_priv(sch);
155         struct net_device *dev = qdisc_dev(sch);
156         struct tc_red_qopt_offload opt = {
157                 .handle = sch->handle,
158                 .parent = sch->parent,
159         };
160         int err;
161
162         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
163                 return -EOPNOTSUPP;
164
165         if (enable) {
166                 opt.command = TC_RED_REPLACE;
167                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
168                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
169                 opt.set.probability = q->parms.max_P;
170                 opt.set.is_ecn = red_use_ecn(q);
171         } else {
172                 opt.command = TC_RED_DESTROY;
173         }
174
175         err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176
177         if (!err && enable)
178                 sch->flags |= TCQ_F_OFFLOADED;
179         else
180                 sch->flags &= ~TCQ_F_OFFLOADED;
181
182         return err;
183 }
184
185 static void red_destroy(struct Qdisc *sch)
186 {
187         struct red_sched_data *q = qdisc_priv(sch);
188
189         del_timer_sync(&q->adapt_timer);
190         red_offload(sch, false);
191         qdisc_destroy(q->qdisc);
192 }
193
194 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
195         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
196         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
197         [TCA_RED_MAX_P] = { .type = NLA_U32 },
198 };
199
200 static int red_change(struct Qdisc *sch, struct nlattr *opt)
201 {
202         struct red_sched_data *q = qdisc_priv(sch);
203         struct nlattr *tb[TCA_RED_MAX + 1];
204         struct tc_red_qopt *ctl;
205         struct Qdisc *child = NULL;
206         int err;
207         u32 max_P;
208
209         if (opt == NULL)
210                 return -EINVAL;
211
212         err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
213         if (err < 0)
214                 return err;
215
216         if (tb[TCA_RED_PARMS] == NULL ||
217             tb[TCA_RED_STAB] == NULL)
218                 return -EINVAL;
219
220         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
221
222         ctl = nla_data(tb[TCA_RED_PARMS]);
223         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
224                 return -EINVAL;
225
226         if (ctl->limit > 0) {
227                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
228                 if (IS_ERR(child))
229                         return PTR_ERR(child);
230         }
231
232         if (child != &noop_qdisc)
233                 qdisc_hash_add(child, true);
234         sch_tree_lock(sch);
235         q->flags = ctl->flags;
236         q->limit = ctl->limit;
237         if (child) {
238                 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
239                                           q->qdisc->qstats.backlog);
240                 qdisc_destroy(q->qdisc);
241                 q->qdisc = child;
242         }
243
244         red_set_parms(&q->parms,
245                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
246                       ctl->Plog, ctl->Scell_log,
247                       nla_data(tb[TCA_RED_STAB]),
248                       max_P);
249         red_set_vars(&q->vars);
250
251         del_timer(&q->adapt_timer);
252         if (ctl->flags & TC_RED_ADAPTATIVE)
253                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
254
255         if (!q->qdisc->q.qlen)
256                 red_start_of_idle_period(&q->vars);
257
258         sch_tree_unlock(sch);
259         red_offload(sch, true);
260         return 0;
261 }
262
263 static inline void red_adaptative_timer(struct timer_list *t)
264 {
265         struct red_sched_data *q = from_timer(q, t, adapt_timer);
266         struct Qdisc *sch = q->sch;
267         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
268
269         spin_lock(root_lock);
270         red_adaptative_algo(&q->parms, &q->vars);
271         mod_timer(&q->adapt_timer, jiffies + HZ/2);
272         spin_unlock(root_lock);
273 }
274
275 static int red_init(struct Qdisc *sch, struct nlattr *opt)
276 {
277         struct red_sched_data *q = qdisc_priv(sch);
278
279         q->qdisc = &noop_qdisc;
280         q->sch = sch;
281         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
282         return red_change(sch, opt);
283 }
284
285 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
286 {
287         struct net_device *dev = qdisc_dev(sch);
288         struct tc_red_qopt_offload hw_stats = {
289                 .command = TC_RED_STATS,
290                 .handle = sch->handle,
291                 .parent = sch->parent,
292                 {
293                         .stats.bstats = &sch->bstats,
294                         .stats.qstats = &sch->qstats,
295                 },
296         };
297
298         if (!(sch->flags & TCQ_F_OFFLOADED))
299                 return 0;
300
301         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
302                                              &hw_stats);
303 }
304
305 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
306 {
307         struct red_sched_data *q = qdisc_priv(sch);
308         struct nlattr *opts = NULL;
309         struct tc_red_qopt opt = {
310                 .limit          = q->limit,
311                 .flags          = q->flags,
312                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
313                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
314                 .Wlog           = q->parms.Wlog,
315                 .Plog           = q->parms.Plog,
316                 .Scell_log      = q->parms.Scell_log,
317         };
318         int err;
319
320         sch->qstats.backlog = q->qdisc->qstats.backlog;
321         err = red_dump_offload_stats(sch, &opt);
322         if (err)
323                 goto nla_put_failure;
324
325         opts = nla_nest_start(skb, TCA_OPTIONS);
326         if (opts == NULL)
327                 goto nla_put_failure;
328         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
329             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
330                 goto nla_put_failure;
331         return nla_nest_end(skb, opts);
332
333 nla_put_failure:
334         nla_nest_cancel(skb, opts);
335         return -EMSGSIZE;
336 }
337
338 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
339 {
340         struct red_sched_data *q = qdisc_priv(sch);
341         struct net_device *dev = qdisc_dev(sch);
342         struct tc_red_xstats st = {
343                 .early  = q->stats.prob_drop + q->stats.forced_drop,
344                 .pdrop  = q->stats.pdrop,
345                 .other  = q->stats.other,
346                 .marked = q->stats.prob_mark + q->stats.forced_mark,
347         };
348
349         if (sch->flags & TCQ_F_OFFLOADED) {
350                 struct red_stats hw_stats = {0};
351                 struct tc_red_qopt_offload hw_stats_request = {
352                         .command = TC_RED_XSTATS,
353                         .handle = sch->handle,
354                         .parent = sch->parent,
355                         {
356                                 .xstats = &hw_stats,
357                         },
358                 };
359                 if (!dev->netdev_ops->ndo_setup_tc(dev,
360                                                    TC_SETUP_QDISC_RED,
361                                                    &hw_stats_request)) {
362                         st.early += hw_stats.prob_drop + hw_stats.forced_drop;
363                         st.pdrop += hw_stats.pdrop;
364                         st.other += hw_stats.other;
365                         st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
366                 }
367         }
368
369         return gnet_stats_copy_app(d, &st, sizeof(st));
370 }
371
372 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
373                           struct sk_buff *skb, struct tcmsg *tcm)
374 {
375         struct red_sched_data *q = qdisc_priv(sch);
376
377         tcm->tcm_handle |= TC_H_MIN(1);
378         tcm->tcm_info = q->qdisc->handle;
379         return 0;
380 }
381
382 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
383                      struct Qdisc **old)
384 {
385         struct red_sched_data *q = qdisc_priv(sch);
386
387         if (new == NULL)
388                 new = &noop_qdisc;
389
390         *old = qdisc_replace(sch, new, &q->qdisc);
391         return 0;
392 }
393
394 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
395 {
396         struct red_sched_data *q = qdisc_priv(sch);
397         return q->qdisc;
398 }
399
400 static unsigned long red_find(struct Qdisc *sch, u32 classid)
401 {
402         return 1;
403 }
404
405 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
406 {
407         if (!walker->stop) {
408                 if (walker->count >= walker->skip)
409                         if (walker->fn(sch, 1, walker) < 0) {
410                                 walker->stop = 1;
411                                 return;
412                         }
413                 walker->count++;
414         }
415 }
416
417 static const struct Qdisc_class_ops red_class_ops = {
418         .graft          =       red_graft,
419         .leaf           =       red_leaf,
420         .find           =       red_find,
421         .walk           =       red_walk,
422         .dump           =       red_dump_class,
423 };
424
425 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
426         .id             =       "red",
427         .priv_size      =       sizeof(struct red_sched_data),
428         .cl_ops         =       &red_class_ops,
429         .enqueue        =       red_enqueue,
430         .dequeue        =       red_dequeue,
431         .peek           =       red_peek,
432         .init           =       red_init,
433         .reset          =       red_reset,
434         .destroy        =       red_destroy,
435         .change         =       red_change,
436         .dump           =       red_dump,
437         .dump_stats     =       red_dump_stats,
438         .owner          =       THIS_MODULE,
439 };
440
441 static int __init red_module_init(void)
442 {
443         return register_qdisc(&red_qdisc_ops);
444 }
445
446 static void __exit red_module_exit(void)
447 {
448         unregister_qdisc(&red_qdisc_ops);
449 }
450
451 module_init(red_module_init)
452 module_exit(red_module_exit)
453
454 MODULE_LICENSE("GPL");