Merge git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild
[sfrench/cifs-2.6.git] / net / sched / act_police.c
1 /*
2  * net/sched/police.c   Input police filter.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              J Hadi Salim (action changes)
11  */
12
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
21 #include <linux/mm.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/in.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/netdevice.h>
28 #include <linux/skbuff.h>
29 #include <linux/module.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
32 #include <net/sock.h>
33 #include <net/act_api.h>
34
35 #define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
36 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
37 #define PRIV(a) ((struct tcf_police *) (a)->priv)
38
39 /* use generic hash table */
40 #define MY_TAB_SIZE     16
41 #define MY_TAB_MASK     15
42 static u32 idx_gen;
43 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
44 /* Policer hash table lock */
45 static DEFINE_RWLOCK(police_lock);
46
47 /* Each policer is serialized by its individual spinlock */
48
49 static __inline__ unsigned tcf_police_hash(u32 index)
50 {
51         return index&0xF;
52 }
53
54 static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
55 {
56         struct tcf_police *p;
57
58         read_lock(&police_lock);
59         for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
60                 if (p->index == index)
61                         break;
62         }
63         read_unlock(&police_lock);
64         return p;
65 }
66
67 #ifdef CONFIG_NET_CLS_ACT
68 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
69                               int type, struct tc_action *a)
70 {
71         struct tcf_police *p;
72         int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
73         struct rtattr *r;
74
75         read_lock(&police_lock);
76
77         s_i = cb->args[0];
78
79         for (i = 0; i < MY_TAB_SIZE; i++) {
80                 p = tcf_police_ht[tcf_police_hash(i)];
81
82                 for (; p; p = p->next) {
83                         index++;
84                         if (index < s_i)
85                                 continue;
86                         a->priv = p;
87                         a->order = index;
88                         r = (struct rtattr*) skb->tail;
89                         RTA_PUT(skb, a->order, 0, NULL);
90                         if (type == RTM_DELACTION)
91                                 err = tcf_action_dump_1(skb, a, 0, 1);
92                         else
93                                 err = tcf_action_dump_1(skb, a, 0, 0);
94                         if (err < 0) {
95                                 index--;
96                                 skb_trim(skb, (u8*)r - skb->data);
97                                 goto done;
98                         }
99                         r->rta_len = skb->tail - (u8*)r;
100                         n_i++;
101                 }
102         }
103 done:
104         read_unlock(&police_lock);
105         if (n_i)
106                 cb->args[0] += n_i;
107         return n_i;
108
109 rtattr_failure:
110         skb_trim(skb, (u8*)r - skb->data);
111         goto done;
112 }
113
114 static inline int
115 tcf_act_police_hash_search(struct tc_action *a, u32 index)
116 {
117         struct tcf_police *p = tcf_police_lookup(index);
118
119         if (p != NULL) {
120                 a->priv = p;
121                 return 1;
122         } else {
123                 return 0;
124         }
125 }
126 #endif
127
128 static inline u32 tcf_police_new_index(void)
129 {
130         do {
131                 if (++idx_gen == 0)
132                         idx_gen = 1;
133         } while (tcf_police_lookup(idx_gen));
134
135         return idx_gen;
136 }
137
138 void tcf_police_destroy(struct tcf_police *p)
139 {
140         unsigned h = tcf_police_hash(p->index);
141         struct tcf_police **p1p;
142         
143         for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
144                 if (*p1p == p) {
145                         write_lock_bh(&police_lock);
146                         *p1p = p->next;
147                         write_unlock_bh(&police_lock);
148 #ifdef CONFIG_NET_ESTIMATOR
149                         gen_kill_estimator(&p->bstats, &p->rate_est);
150 #endif
151                         if (p->R_tab)
152                                 qdisc_put_rtab(p->R_tab);
153                         if (p->P_tab)
154                                 qdisc_put_rtab(p->P_tab);
155                         kfree(p);
156                         return;
157                 }
158         }
159         BUG_TRAP(0);
160 }
161
162 #ifdef CONFIG_NET_CLS_ACT
163 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
164                                  struct tc_action *a, int ovr, int bind)
165 {
166         unsigned h;
167         int ret = 0, err;
168         struct rtattr *tb[TCA_POLICE_MAX];
169         struct tc_police *parm;
170         struct tcf_police *p;
171         struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
172
173         if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
174                 return -EINVAL;
175
176         if (tb[TCA_POLICE_TBF-1] == NULL ||
177             RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
178                 return -EINVAL;
179         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
180
181         if (tb[TCA_POLICE_RESULT-1] != NULL &&
182             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
183                 return -EINVAL;
184         if (tb[TCA_POLICE_RESULT-1] != NULL &&
185             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
186                 return -EINVAL;
187
188         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
189                 a->priv = p;
190                 if (bind) {
191                         p->bindcnt += 1;
192                         p->refcnt += 1;
193                 }
194                 if (ovr)
195                         goto override;
196                 return ret;
197         }
198
199         p = kmalloc(sizeof(*p), GFP_KERNEL);
200         if (p == NULL)
201                 return -ENOMEM;
202         memset(p, 0, sizeof(*p));
203
204         ret = ACT_P_CREATED;
205         p->refcnt = 1;
206         spin_lock_init(&p->lock);
207         p->stats_lock = &p->lock;
208         if (bind)
209                 p->bindcnt = 1;
210 override:
211         if (parm->rate.rate) {
212                 err = -ENOMEM;
213                 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
214                 if (R_tab == NULL)
215                         goto failure;
216                 if (parm->peakrate.rate) {
217                         P_tab = qdisc_get_rtab(&parm->peakrate,
218                                                tb[TCA_POLICE_PEAKRATE-1]);
219                         if (p->P_tab == NULL) {
220                                 qdisc_put_rtab(R_tab);
221                                 goto failure;
222                         }
223                 }
224         }
225         /* No failure allowed after this point */
226         spin_lock_bh(&p->lock);
227         if (R_tab != NULL) {
228                 qdisc_put_rtab(p->R_tab);
229                 p->R_tab = R_tab;
230         }
231         if (P_tab != NULL) {
232                 qdisc_put_rtab(p->P_tab);
233                 p->P_tab = P_tab;
234         }
235
236         if (tb[TCA_POLICE_RESULT-1])
237                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
238         p->toks = p->burst = parm->burst;
239         p->mtu = parm->mtu;
240         if (p->mtu == 0) {
241                 p->mtu = ~0;
242                 if (p->R_tab)
243                         p->mtu = 255<<p->R_tab->rate.cell_log;
244         }
245         if (p->P_tab)
246                 p->ptoks = L2T_P(p, p->mtu);
247         p->action = parm->action;
248
249 #ifdef CONFIG_NET_ESTIMATOR
250         if (tb[TCA_POLICE_AVRATE-1])
251                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
252         if (est)
253                 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
254 #endif
255
256         spin_unlock_bh(&p->lock);
257         if (ret != ACT_P_CREATED)
258                 return ret;
259
260         PSCHED_GET_TIME(p->t_c);
261         p->index = parm->index ? : tcf_police_new_index();
262         h = tcf_police_hash(p->index);
263         write_lock_bh(&police_lock);
264         p->next = tcf_police_ht[h];
265         tcf_police_ht[h] = p;
266         write_unlock_bh(&police_lock);
267
268         a->priv = p;
269         return ret;
270
271 failure:
272         if (ret == ACT_P_CREATED)
273                 kfree(p);
274         return err;
275 }
276
277 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
278 {
279         struct tcf_police *p = PRIV(a);
280
281         if (p != NULL)
282                 return tcf_police_release(p, bind);
283         return 0;
284 }
285
286 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
287                           struct tcf_result *res)
288 {
289         psched_time_t now;
290         struct tcf_police *p = PRIV(a);
291         long toks;
292         long ptoks = 0;
293
294         spin_lock(&p->lock);
295
296         p->bstats.bytes += skb->len;
297         p->bstats.packets++;
298
299 #ifdef CONFIG_NET_ESTIMATOR
300         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
301                 p->qstats.overlimits++;
302                 spin_unlock(&p->lock);
303                 return p->action;
304         }
305 #endif
306
307         if (skb->len <= p->mtu) {
308                 if (p->R_tab == NULL) {
309                         spin_unlock(&p->lock);
310                         return p->result;
311                 }
312
313                 PSCHED_GET_TIME(now);
314
315                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
316
317                 if (p->P_tab) {
318                         ptoks = toks + p->ptoks;
319                         if (ptoks > (long)L2T_P(p, p->mtu))
320                                 ptoks = (long)L2T_P(p, p->mtu);
321                         ptoks -= L2T_P(p, skb->len);
322                 }
323                 toks += p->toks;
324                 if (toks > (long)p->burst)
325                         toks = p->burst;
326                 toks -= L2T(p, skb->len);
327
328                 if ((toks|ptoks) >= 0) {
329                         p->t_c = now;
330                         p->toks = toks;
331                         p->ptoks = ptoks;
332                         spin_unlock(&p->lock);
333                         return p->result;
334                 }
335         }
336
337         p->qstats.overlimits++;
338         spin_unlock(&p->lock);
339         return p->action;
340 }
341
342 static int
343 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
344 {
345         unsigned char    *b = skb->tail;
346         struct tc_police opt;
347         struct tcf_police *p = PRIV(a);
348
349         opt.index = p->index;
350         opt.action = p->action;
351         opt.mtu = p->mtu;
352         opt.burst = p->burst;
353         opt.refcnt = p->refcnt - ref;
354         opt.bindcnt = p->bindcnt - bind;
355         if (p->R_tab)
356                 opt.rate = p->R_tab->rate;
357         else
358                 memset(&opt.rate, 0, sizeof(opt.rate));
359         if (p->P_tab)
360                 opt.peakrate = p->P_tab->rate;
361         else
362                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
363         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
364         if (p->result)
365                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
366 #ifdef CONFIG_NET_ESTIMATOR
367         if (p->ewma_rate)
368                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
369 #endif
370         return skb->len;
371
372 rtattr_failure:
373         skb_trim(skb, b - skb->data);
374         return -1;
375 }
376
377 MODULE_AUTHOR("Alexey Kuznetsov");
378 MODULE_DESCRIPTION("Policing actions");
379 MODULE_LICENSE("GPL");
380
381 static struct tc_action_ops act_police_ops = {
382         .kind           =       "police",
383         .type           =       TCA_ID_POLICE,
384         .capab          =       TCA_CAP_NONE,
385         .owner          =       THIS_MODULE,
386         .act            =       tcf_act_police,
387         .dump           =       tcf_act_police_dump,
388         .cleanup        =       tcf_act_police_cleanup,
389         .lookup         =       tcf_act_police_hash_search,
390         .init           =       tcf_act_police_locate,
391         .walk           =       tcf_act_police_walker
392 };
393
394 static int __init
395 police_init_module(void)
396 {
397         return tcf_register_action(&act_police_ops);
398 }
399
400 static void __exit
401 police_cleanup_module(void)
402 {
403         tcf_unregister_action(&act_police_ops);
404 }
405
406 module_init(police_init_module);
407 module_exit(police_cleanup_module);
408
409 #else /* CONFIG_NET_CLS_ACT */
410
411 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
412 {
413         unsigned h;
414         struct tcf_police *p;
415         struct rtattr *tb[TCA_POLICE_MAX];
416         struct tc_police *parm;
417
418         if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
419                 return NULL;
420
421         if (tb[TCA_POLICE_TBF-1] == NULL ||
422             RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
423                 return NULL;
424
425         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
426
427         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
428                 p->refcnt++;
429                 return p;
430         }
431
432         p = kmalloc(sizeof(*p), GFP_KERNEL);
433         if (p == NULL)
434                 return NULL;
435
436         memset(p, 0, sizeof(*p));
437         p->refcnt = 1;
438         spin_lock_init(&p->lock);
439         p->stats_lock = &p->lock;
440         if (parm->rate.rate) {
441                 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
442                 if (p->R_tab == NULL)
443                         goto failure;
444                 if (parm->peakrate.rate) {
445                         p->P_tab = qdisc_get_rtab(&parm->peakrate,
446                                                   tb[TCA_POLICE_PEAKRATE-1]);
447                         if (p->P_tab == NULL)
448                                 goto failure;
449                 }
450         }
451         if (tb[TCA_POLICE_RESULT-1]) {
452                 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
453                         goto failure;
454                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
455         }
456 #ifdef CONFIG_NET_ESTIMATOR
457         if (tb[TCA_POLICE_AVRATE-1]) {
458                 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
459                         goto failure;
460                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
461         }
462 #endif
463         p->toks = p->burst = parm->burst;
464         p->mtu = parm->mtu;
465         if (p->mtu == 0) {
466                 p->mtu = ~0;
467                 if (p->R_tab)
468                         p->mtu = 255<<p->R_tab->rate.cell_log;
469         }
470         if (p->P_tab)
471                 p->ptoks = L2T_P(p, p->mtu);
472         PSCHED_GET_TIME(p->t_c);
473         p->index = parm->index ? : tcf_police_new_index();
474         p->action = parm->action;
475 #ifdef CONFIG_NET_ESTIMATOR
476         if (est)
477                 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
478 #endif
479         h = tcf_police_hash(p->index);
480         write_lock_bh(&police_lock);
481         p->next = tcf_police_ht[h];
482         tcf_police_ht[h] = p;
483         write_unlock_bh(&police_lock);
484         return p;
485
486 failure:
487         if (p->R_tab)
488                 qdisc_put_rtab(p->R_tab);
489         kfree(p);
490         return NULL;
491 }
492
493 int tcf_police(struct sk_buff *skb, struct tcf_police *p)
494 {
495         psched_time_t now;
496         long toks;
497         long ptoks = 0;
498
499         spin_lock(&p->lock);
500
501         p->bstats.bytes += skb->len;
502         p->bstats.packets++;
503
504 #ifdef CONFIG_NET_ESTIMATOR
505         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
506                 p->qstats.overlimits++;
507                 spin_unlock(&p->lock);
508                 return p->action;
509         }
510 #endif
511
512         if (skb->len <= p->mtu) {
513                 if (p->R_tab == NULL) {
514                         spin_unlock(&p->lock);
515                         return p->result;
516                 }
517
518                 PSCHED_GET_TIME(now);
519
520                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
521
522                 if (p->P_tab) {
523                         ptoks = toks + p->ptoks;
524                         if (ptoks > (long)L2T_P(p, p->mtu))
525                                 ptoks = (long)L2T_P(p, p->mtu);
526                         ptoks -= L2T_P(p, skb->len);
527                 }
528                 toks += p->toks;
529                 if (toks > (long)p->burst)
530                         toks = p->burst;
531                 toks -= L2T(p, skb->len);
532
533                 if ((toks|ptoks) >= 0) {
534                         p->t_c = now;
535                         p->toks = toks;
536                         p->ptoks = ptoks;
537                         spin_unlock(&p->lock);
538                         return p->result;
539                 }
540         }
541
542         p->qstats.overlimits++;
543         spin_unlock(&p->lock);
544         return p->action;
545 }
546 EXPORT_SYMBOL(tcf_police);
547
548 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
549 {
550         unsigned char    *b = skb->tail;
551         struct tc_police opt;
552
553         opt.index = p->index;
554         opt.action = p->action;
555         opt.mtu = p->mtu;
556         opt.burst = p->burst;
557         if (p->R_tab)
558                 opt.rate = p->R_tab->rate;
559         else
560                 memset(&opt.rate, 0, sizeof(opt.rate));
561         if (p->P_tab)
562                 opt.peakrate = p->P_tab->rate;
563         else
564                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
565         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
566         if (p->result)
567                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
568 #ifdef CONFIG_NET_ESTIMATOR
569         if (p->ewma_rate)
570                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
571 #endif
572         return skb->len;
573
574 rtattr_failure:
575         skb_trim(skb, b - skb->data);
576         return -1;
577 }
578
579 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
580 {
581         struct gnet_dump d;
582         
583         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
584                         TCA_XSTATS, p->stats_lock, &d) < 0)
585                 goto errout;
586         
587         if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
588 #ifdef CONFIG_NET_ESTIMATOR
589             gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
590 #endif
591             gnet_stats_copy_queue(&d, &p->qstats) < 0)
592                 goto errout;
593
594         if (gnet_stats_finish_copy(&d) < 0)
595                 goto errout;
596
597         return 0;
598
599 errout:
600         return -1;
601 }
602
603 #endif /* CONFIG_NET_CLS_ACT */