Merge tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block
[sfrench/cifs-2.6.git] / net / sched / cls_api.c
1 /*
2  * net/sched/cls_api.c  Packet classifier API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  *
13  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
14  *
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/kmod.h>
26 #include <linux/slab.h>
27 #include <linux/idr.h>
28 #include <linux/rhashtable.h>
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34
35 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
36
37 /* The list of all installed classifier types */
38 static LIST_HEAD(tcf_proto_base);
39
40 /* Protects list of registered TC modules. It is pure SMP lock. */
41 static DEFINE_RWLOCK(cls_mod_lock);
42
43 /* Find classifier type by string name */
44
45 static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
46 {
47         const struct tcf_proto_ops *t, *res = NULL;
48
49         if (kind) {
50                 read_lock(&cls_mod_lock);
51                 list_for_each_entry(t, &tcf_proto_base, head) {
52                         if (strcmp(kind, t->kind) == 0) {
53                                 if (try_module_get(t->owner))
54                                         res = t;
55                                 break;
56                         }
57                 }
58                 read_unlock(&cls_mod_lock);
59         }
60         return res;
61 }
62
63 static const struct tcf_proto_ops *
64 tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
65 {
66         const struct tcf_proto_ops *ops;
67
68         ops = __tcf_proto_lookup_ops(kind);
69         if (ops)
70                 return ops;
71 #ifdef CONFIG_MODULES
72         rtnl_unlock();
73         request_module("cls_%s", kind);
74         rtnl_lock();
75         ops = __tcf_proto_lookup_ops(kind);
76         /* We dropped the RTNL semaphore in order to perform
77          * the module load. So, even if we succeeded in loading
78          * the module we have to replay the request. We indicate
79          * this using -EAGAIN.
80          */
81         if (ops) {
82                 module_put(ops->owner);
83                 return ERR_PTR(-EAGAIN);
84         }
85 #endif
86         NL_SET_ERR_MSG(extack, "TC classifier not found");
87         return ERR_PTR(-ENOENT);
88 }
89
90 /* Register(unregister) new classifier type */
91
92 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
93 {
94         struct tcf_proto_ops *t;
95         int rc = -EEXIST;
96
97         write_lock(&cls_mod_lock);
98         list_for_each_entry(t, &tcf_proto_base, head)
99                 if (!strcmp(ops->kind, t->kind))
100                         goto out;
101
102         list_add_tail(&ops->head, &tcf_proto_base);
103         rc = 0;
104 out:
105         write_unlock(&cls_mod_lock);
106         return rc;
107 }
108 EXPORT_SYMBOL(register_tcf_proto_ops);
109
110 static struct workqueue_struct *tc_filter_wq;
111
112 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
113 {
114         struct tcf_proto_ops *t;
115         int rc = -ENOENT;
116
117         /* Wait for outstanding call_rcu()s, if any, from a
118          * tcf_proto_ops's destroy() handler.
119          */
120         rcu_barrier();
121         flush_workqueue(tc_filter_wq);
122
123         write_lock(&cls_mod_lock);
124         list_for_each_entry(t, &tcf_proto_base, head) {
125                 if (t == ops) {
126                         list_del(&t->head);
127                         rc = 0;
128                         break;
129                 }
130         }
131         write_unlock(&cls_mod_lock);
132         return rc;
133 }
134 EXPORT_SYMBOL(unregister_tcf_proto_ops);
135
136 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
137 {
138         INIT_RCU_WORK(rwork, func);
139         return queue_rcu_work(tc_filter_wq, rwork);
140 }
141 EXPORT_SYMBOL(tcf_queue_work);
142
143 /* Select new prio value from the range, managed by kernel. */
144
145 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
146 {
147         u32 first = TC_H_MAKE(0xC0000000U, 0U);
148
149         if (tp)
150                 first = tp->prio - 1;
151
152         return TC_H_MAJ(first);
153 }
154
155 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
156                                           u32 prio, struct tcf_chain *chain,
157                                           struct netlink_ext_ack *extack)
158 {
159         struct tcf_proto *tp;
160         int err;
161
162         tp = kzalloc(sizeof(*tp), GFP_KERNEL);
163         if (!tp)
164                 return ERR_PTR(-ENOBUFS);
165
166         tp->ops = tcf_proto_lookup_ops(kind, extack);
167         if (IS_ERR(tp->ops)) {
168                 err = PTR_ERR(tp->ops);
169                 goto errout;
170         }
171         tp->classify = tp->ops->classify;
172         tp->protocol = protocol;
173         tp->prio = prio;
174         tp->chain = chain;
175
176         err = tp->ops->init(tp);
177         if (err) {
178                 module_put(tp->ops->owner);
179                 goto errout;
180         }
181         return tp;
182
183 errout:
184         kfree(tp);
185         return ERR_PTR(err);
186 }
187
188 static void tcf_proto_destroy(struct tcf_proto *tp,
189                               struct netlink_ext_ack *extack)
190 {
191         tp->ops->destroy(tp, extack);
192         module_put(tp->ops->owner);
193         kfree_rcu(tp, rcu);
194 }
195
196 struct tcf_filter_chain_list_item {
197         struct list_head list;
198         tcf_chain_head_change_t *chain_head_change;
199         void *chain_head_change_priv;
200 };
201
202 static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
203                                           u32 chain_index)
204 {
205         struct tcf_chain *chain;
206
207         chain = kzalloc(sizeof(*chain), GFP_KERNEL);
208         if (!chain)
209                 return NULL;
210         list_add_tail(&chain->list, &block->chain_list);
211         chain->block = block;
212         chain->index = chain_index;
213         chain->refcnt = 1;
214         if (!chain->index)
215                 block->chain0.chain = chain;
216         return chain;
217 }
218
219 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
220                                        struct tcf_proto *tp_head)
221 {
222         if (item->chain_head_change)
223                 item->chain_head_change(tp_head, item->chain_head_change_priv);
224 }
225
226 static void tcf_chain0_head_change(struct tcf_chain *chain,
227                                    struct tcf_proto *tp_head)
228 {
229         struct tcf_filter_chain_list_item *item;
230         struct tcf_block *block = chain->block;
231
232         if (chain->index)
233                 return;
234         list_for_each_entry(item, &block->chain0.filter_chain_list, list)
235                 tcf_chain_head_change_item(item, tp_head);
236 }
237
238 static void tcf_chain_destroy(struct tcf_chain *chain)
239 {
240         struct tcf_block *block = chain->block;
241
242         list_del(&chain->list);
243         if (!chain->index)
244                 block->chain0.chain = NULL;
245         kfree(chain);
246         if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
247                 kfree_rcu(block, rcu);
248 }
249
250 static void tcf_chain_hold(struct tcf_chain *chain)
251 {
252         ++chain->refcnt;
253 }
254
255 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
256 {
257         /* In case all the references are action references, this
258          * chain should not be shown to the user.
259          */
260         return chain->refcnt == chain->action_refcnt;
261 }
262
263 static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
264                                           u32 chain_index)
265 {
266         struct tcf_chain *chain;
267
268         list_for_each_entry(chain, &block->chain_list, list) {
269                 if (chain->index == chain_index)
270                         return chain;
271         }
272         return NULL;
273 }
274
275 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
276                            u32 seq, u16 flags, int event, bool unicast);
277
278 static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
279                                          u32 chain_index, bool create,
280                                          bool by_act)
281 {
282         struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
283
284         if (chain) {
285                 tcf_chain_hold(chain);
286         } else {
287                 if (!create)
288                         return NULL;
289                 chain = tcf_chain_create(block, chain_index);
290                 if (!chain)
291                         return NULL;
292         }
293
294         if (by_act)
295                 ++chain->action_refcnt;
296
297         /* Send notification only in case we got the first
298          * non-action reference. Until then, the chain acts only as
299          * a placeholder for actions pointing to it and user ought
300          * not know about them.
301          */
302         if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
303                 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
304                                 RTM_NEWCHAIN, false);
305
306         return chain;
307 }
308
309 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
310                                        bool create)
311 {
312         return __tcf_chain_get(block, chain_index, create, false);
313 }
314
315 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
316 {
317         return __tcf_chain_get(block, chain_index, true, true);
318 }
319 EXPORT_SYMBOL(tcf_chain_get_by_act);
320
321 static void tc_chain_tmplt_del(struct tcf_chain *chain);
322
323 static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
324 {
325         if (by_act)
326                 chain->action_refcnt--;
327         chain->refcnt--;
328
329         /* The last dropped non-action reference will trigger notification. */
330         if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
331                 tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
332
333         if (chain->refcnt == 0) {
334                 tc_chain_tmplt_del(chain);
335                 tcf_chain_destroy(chain);
336         }
337 }
338
339 static void tcf_chain_put(struct tcf_chain *chain)
340 {
341         __tcf_chain_put(chain, false);
342 }
343
344 void tcf_chain_put_by_act(struct tcf_chain *chain)
345 {
346         __tcf_chain_put(chain, true);
347 }
348 EXPORT_SYMBOL(tcf_chain_put_by_act);
349
350 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
351 {
352         if (chain->explicitly_created)
353                 tcf_chain_put(chain);
354 }
355
356 static void tcf_chain_flush(struct tcf_chain *chain)
357 {
358         struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
359
360         tcf_chain0_head_change(chain, NULL);
361         while (tp) {
362                 RCU_INIT_POINTER(chain->filter_chain, tp->next);
363                 tcf_proto_destroy(tp, NULL);
364                 tp = rtnl_dereference(chain->filter_chain);
365                 tcf_chain_put(chain);
366         }
367 }
368
369 static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
370 {
371         const struct Qdisc_class_ops *cops;
372         struct Qdisc *qdisc;
373
374         if (!dev_ingress_queue(dev))
375                 return NULL;
376
377         qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
378         if (!qdisc)
379                 return NULL;
380
381         cops = qdisc->ops->cl_ops;
382         if (!cops)
383                 return NULL;
384
385         if (!cops->tcf_block)
386                 return NULL;
387
388         return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
389 }
390
391 static struct rhashtable indr_setup_block_ht;
392
393 struct tc_indr_block_dev {
394         struct rhash_head ht_node;
395         struct net_device *dev;
396         unsigned int refcnt;
397         struct list_head cb_list;
398         struct tcf_block *block;
399 };
400
401 struct tc_indr_block_cb {
402         struct list_head list;
403         void *cb_priv;
404         tc_indr_block_bind_cb_t *cb;
405         void *cb_ident;
406 };
407
408 static const struct rhashtable_params tc_indr_setup_block_ht_params = {
409         .key_offset     = offsetof(struct tc_indr_block_dev, dev),
410         .head_offset    = offsetof(struct tc_indr_block_dev, ht_node),
411         .key_len        = sizeof(struct net_device *),
412 };
413
414 static struct tc_indr_block_dev *
415 tc_indr_block_dev_lookup(struct net_device *dev)
416 {
417         return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
418                                       tc_indr_setup_block_ht_params);
419 }
420
421 static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
422 {
423         struct tc_indr_block_dev *indr_dev;
424
425         indr_dev = tc_indr_block_dev_lookup(dev);
426         if (indr_dev)
427                 goto inc_ref;
428
429         indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
430         if (!indr_dev)
431                 return NULL;
432
433         INIT_LIST_HEAD(&indr_dev->cb_list);
434         indr_dev->dev = dev;
435         indr_dev->block = tc_dev_ingress_block(dev);
436         if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
437                                    tc_indr_setup_block_ht_params)) {
438                 kfree(indr_dev);
439                 return NULL;
440         }
441
442 inc_ref:
443         indr_dev->refcnt++;
444         return indr_dev;
445 }
446
447 static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
448 {
449         if (--indr_dev->refcnt)
450                 return;
451
452         rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
453                                tc_indr_setup_block_ht_params);
454         kfree(indr_dev);
455 }
456
457 static struct tc_indr_block_cb *
458 tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
459                         tc_indr_block_bind_cb_t *cb, void *cb_ident)
460 {
461         struct tc_indr_block_cb *indr_block_cb;
462
463         list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
464                 if (indr_block_cb->cb == cb &&
465                     indr_block_cb->cb_ident == cb_ident)
466                         return indr_block_cb;
467         return NULL;
468 }
469
470 static struct tc_indr_block_cb *
471 tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
472                      tc_indr_block_bind_cb_t *cb, void *cb_ident)
473 {
474         struct tc_indr_block_cb *indr_block_cb;
475
476         indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
477         if (indr_block_cb)
478                 return ERR_PTR(-EEXIST);
479
480         indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
481         if (!indr_block_cb)
482                 return ERR_PTR(-ENOMEM);
483
484         indr_block_cb->cb_priv = cb_priv;
485         indr_block_cb->cb = cb;
486         indr_block_cb->cb_ident = cb_ident;
487         list_add(&indr_block_cb->list, &indr_dev->cb_list);
488
489         return indr_block_cb;
490 }
491
492 static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
493 {
494         list_del(&indr_block_cb->list);
495         kfree(indr_block_cb);
496 }
497
498 static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
499                                   struct tc_indr_block_cb *indr_block_cb,
500                                   enum tc_block_command command)
501 {
502         struct tc_block_offload bo = {
503                 .command        = command,
504                 .binder_type    = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
505                 .block          = indr_dev->block,
506         };
507
508         if (!indr_dev->block)
509                 return;
510
511         indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
512                           &bo);
513 }
514
515 int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
516                                 tc_indr_block_bind_cb_t *cb, void *cb_ident)
517 {
518         struct tc_indr_block_cb *indr_block_cb;
519         struct tc_indr_block_dev *indr_dev;
520         int err;
521
522         indr_dev = tc_indr_block_dev_get(dev);
523         if (!indr_dev)
524                 return -ENOMEM;
525
526         indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
527         err = PTR_ERR_OR_ZERO(indr_block_cb);
528         if (err)
529                 goto err_dev_put;
530
531         tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND);
532         return 0;
533
534 err_dev_put:
535         tc_indr_block_dev_put(indr_dev);
536         return err;
537 }
538 EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
539
540 int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
541                               tc_indr_block_bind_cb_t *cb, void *cb_ident)
542 {
543         int err;
544
545         rtnl_lock();
546         err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
547         rtnl_unlock();
548
549         return err;
550 }
551 EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
552
553 void __tc_indr_block_cb_unregister(struct net_device *dev,
554                                    tc_indr_block_bind_cb_t *cb, void *cb_ident)
555 {
556         struct tc_indr_block_cb *indr_block_cb;
557         struct tc_indr_block_dev *indr_dev;
558
559         indr_dev = tc_indr_block_dev_lookup(dev);
560         if (!indr_dev)
561                 return;
562
563         indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
564         if (!indr_block_cb)
565                 return;
566
567         /* Send unbind message if required to free any block cbs. */
568         tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND);
569         tc_indr_block_cb_del(indr_block_cb);
570         tc_indr_block_dev_put(indr_dev);
571 }
572 EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
573
574 void tc_indr_block_cb_unregister(struct net_device *dev,
575                                  tc_indr_block_bind_cb_t *cb, void *cb_ident)
576 {
577         rtnl_lock();
578         __tc_indr_block_cb_unregister(dev, cb, cb_ident);
579         rtnl_unlock();
580 }
581 EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
582
583 static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
584                                struct tcf_block_ext_info *ei,
585                                enum tc_block_command command,
586                                struct netlink_ext_ack *extack)
587 {
588         struct tc_indr_block_cb *indr_block_cb;
589         struct tc_indr_block_dev *indr_dev;
590         struct tc_block_offload bo = {
591                 .command        = command,
592                 .binder_type    = ei->binder_type,
593                 .block          = block,
594                 .extack         = extack,
595         };
596
597         indr_dev = tc_indr_block_dev_lookup(dev);
598         if (!indr_dev)
599                 return;
600
601         indr_dev->block = command == TC_BLOCK_BIND ? block : NULL;
602
603         list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
604                 indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
605                                   &bo);
606 }
607
608 static bool tcf_block_offload_in_use(struct tcf_block *block)
609 {
610         return block->offloadcnt;
611 }
612
613 static int tcf_block_offload_cmd(struct tcf_block *block,
614                                  struct net_device *dev,
615                                  struct tcf_block_ext_info *ei,
616                                  enum tc_block_command command,
617                                  struct netlink_ext_ack *extack)
618 {
619         struct tc_block_offload bo = {};
620
621         bo.command = command;
622         bo.binder_type = ei->binder_type;
623         bo.block = block;
624         bo.extack = extack;
625         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
626 }
627
628 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
629                                   struct tcf_block_ext_info *ei,
630                                   struct netlink_ext_ack *extack)
631 {
632         struct net_device *dev = q->dev_queue->dev;
633         int err;
634
635         if (!dev->netdev_ops->ndo_setup_tc)
636                 goto no_offload_dev_inc;
637
638         /* If tc offload feature is disabled and the block we try to bind
639          * to already has some offloaded filters, forbid to bind.
640          */
641         if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
642                 NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
643                 return -EOPNOTSUPP;
644         }
645
646         err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
647         if (err == -EOPNOTSUPP)
648                 goto no_offload_dev_inc;
649         if (err)
650                 return err;
651
652         tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
653         return 0;
654
655 no_offload_dev_inc:
656         if (tcf_block_offload_in_use(block))
657                 return -EOPNOTSUPP;
658         block->nooffloaddevcnt++;
659         tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
660         return 0;
661 }
662
663 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
664                                      struct tcf_block_ext_info *ei)
665 {
666         struct net_device *dev = q->dev_queue->dev;
667         int err;
668
669         tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL);
670
671         if (!dev->netdev_ops->ndo_setup_tc)
672                 goto no_offload_dev_dec;
673         err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
674         if (err == -EOPNOTSUPP)
675                 goto no_offload_dev_dec;
676         return;
677
678 no_offload_dev_dec:
679         WARN_ON(block->nooffloaddevcnt-- == 0);
680 }
681
682 static int
683 tcf_chain0_head_change_cb_add(struct tcf_block *block,
684                               struct tcf_block_ext_info *ei,
685                               struct netlink_ext_ack *extack)
686 {
687         struct tcf_chain *chain0 = block->chain0.chain;
688         struct tcf_filter_chain_list_item *item;
689
690         item = kmalloc(sizeof(*item), GFP_KERNEL);
691         if (!item) {
692                 NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
693                 return -ENOMEM;
694         }
695         item->chain_head_change = ei->chain_head_change;
696         item->chain_head_change_priv = ei->chain_head_change_priv;
697         if (chain0 && chain0->filter_chain)
698                 tcf_chain_head_change_item(item, chain0->filter_chain);
699         list_add(&item->list, &block->chain0.filter_chain_list);
700         return 0;
701 }
702
703 static void
704 tcf_chain0_head_change_cb_del(struct tcf_block *block,
705                               struct tcf_block_ext_info *ei)
706 {
707         struct tcf_chain *chain0 = block->chain0.chain;
708         struct tcf_filter_chain_list_item *item;
709
710         list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
711                 if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
712                     (item->chain_head_change == ei->chain_head_change &&
713                      item->chain_head_change_priv == ei->chain_head_change_priv)) {
714                         if (chain0)
715                                 tcf_chain_head_change_item(item, NULL);
716                         list_del(&item->list);
717                         kfree(item);
718                         return;
719                 }
720         }
721         WARN_ON(1);
722 }
723
724 struct tcf_net {
725         spinlock_t idr_lock; /* Protects idr */
726         struct idr idr;
727 };
728
729 static unsigned int tcf_net_id;
730
731 static int tcf_block_insert(struct tcf_block *block, struct net *net,
732                             struct netlink_ext_ack *extack)
733 {
734         struct tcf_net *tn = net_generic(net, tcf_net_id);
735         int err;
736
737         idr_preload(GFP_KERNEL);
738         spin_lock(&tn->idr_lock);
739         err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
740                             GFP_NOWAIT);
741         spin_unlock(&tn->idr_lock);
742         idr_preload_end();
743
744         return err;
745 }
746
747 static void tcf_block_remove(struct tcf_block *block, struct net *net)
748 {
749         struct tcf_net *tn = net_generic(net, tcf_net_id);
750
751         spin_lock(&tn->idr_lock);
752         idr_remove(&tn->idr, block->index);
753         spin_unlock(&tn->idr_lock);
754 }
755
756 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
757                                           u32 block_index,
758                                           struct netlink_ext_ack *extack)
759 {
760         struct tcf_block *block;
761
762         block = kzalloc(sizeof(*block), GFP_KERNEL);
763         if (!block) {
764                 NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
765                 return ERR_PTR(-ENOMEM);
766         }
767         INIT_LIST_HEAD(&block->chain_list);
768         INIT_LIST_HEAD(&block->cb_list);
769         INIT_LIST_HEAD(&block->owner_list);
770         INIT_LIST_HEAD(&block->chain0.filter_chain_list);
771
772         refcount_set(&block->refcnt, 1);
773         block->net = net;
774         block->index = block_index;
775
776         /* Don't store q pointer for blocks which are shared */
777         if (!tcf_block_shared(block))
778                 block->q = q;
779         return block;
780 }
781
782 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
783 {
784         struct tcf_net *tn = net_generic(net, tcf_net_id);
785
786         return idr_find(&tn->idr, block_index);
787 }
788
789 static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
790 {
791         struct tcf_block *block;
792
793         rcu_read_lock();
794         block = tcf_block_lookup(net, block_index);
795         if (block && !refcount_inc_not_zero(&block->refcnt))
796                 block = NULL;
797         rcu_read_unlock();
798
799         return block;
800 }
801
802 static void tcf_block_flush_all_chains(struct tcf_block *block)
803 {
804         struct tcf_chain *chain;
805
806         /* Hold a refcnt for all chains, so that they don't disappear
807          * while we are iterating.
808          */
809         list_for_each_entry(chain, &block->chain_list, list)
810                 tcf_chain_hold(chain);
811
812         list_for_each_entry(chain, &block->chain_list, list)
813                 tcf_chain_flush(chain);
814 }
815
816 static void tcf_block_put_all_chains(struct tcf_block *block)
817 {
818         struct tcf_chain *chain, *tmp;
819
820         /* At this point, all the chains should have refcnt >= 1. */
821         list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
822                 tcf_chain_put_explicitly_created(chain);
823                 tcf_chain_put(chain);
824         }
825 }
826
827 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
828                             struct tcf_block_ext_info *ei)
829 {
830         if (refcount_dec_and_test(&block->refcnt)) {
831                 /* Flushing/putting all chains will cause the block to be
832                  * deallocated when last chain is freed. However, if chain_list
833                  * is empty, block has to be manually deallocated. After block
834                  * reference counter reached 0, it is no longer possible to
835                  * increment it or add new chains to block.
836                  */
837                 bool free_block = list_empty(&block->chain_list);
838
839                 if (tcf_block_shared(block))
840                         tcf_block_remove(block, block->net);
841                 if (!free_block)
842                         tcf_block_flush_all_chains(block);
843
844                 if (q)
845                         tcf_block_offload_unbind(block, q, ei);
846
847                 if (free_block)
848                         kfree_rcu(block, rcu);
849                 else
850                         tcf_block_put_all_chains(block);
851         } else if (q) {
852                 tcf_block_offload_unbind(block, q, ei);
853         }
854 }
855
856 static void tcf_block_refcnt_put(struct tcf_block *block)
857 {
858         __tcf_block_put(block, NULL, NULL);
859 }
860
861 /* Find tcf block.
862  * Set q, parent, cl when appropriate.
863  */
864
865 static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
866                                         u32 *parent, unsigned long *cl,
867                                         int ifindex, u32 block_index,
868                                         struct netlink_ext_ack *extack)
869 {
870         struct tcf_block *block;
871         int err = 0;
872
873         if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
874                 block = tcf_block_refcnt_get(net, block_index);
875                 if (!block) {
876                         NL_SET_ERR_MSG(extack, "Block of given index was not found");
877                         return ERR_PTR(-EINVAL);
878                 }
879         } else {
880                 const struct Qdisc_class_ops *cops;
881                 struct net_device *dev;
882
883                 rcu_read_lock();
884
885                 /* Find link */
886                 dev = dev_get_by_index_rcu(net, ifindex);
887                 if (!dev) {
888                         rcu_read_unlock();
889                         return ERR_PTR(-ENODEV);
890                 }
891
892                 /* Find qdisc */
893                 if (!*parent) {
894                         *q = dev->qdisc;
895                         *parent = (*q)->handle;
896                 } else {
897                         *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
898                         if (!*q) {
899                                 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
900                                 err = -EINVAL;
901                                 goto errout_rcu;
902                         }
903                 }
904
905                 *q = qdisc_refcount_inc_nz(*q);
906                 if (!*q) {
907                         NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
908                         err = -EINVAL;
909                         goto errout_rcu;
910                 }
911
912                 /* Is it classful? */
913                 cops = (*q)->ops->cl_ops;
914                 if (!cops) {
915                         NL_SET_ERR_MSG(extack, "Qdisc not classful");
916                         err = -EINVAL;
917                         goto errout_rcu;
918                 }
919
920                 if (!cops->tcf_block) {
921                         NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
922                         err = -EOPNOTSUPP;
923                         goto errout_rcu;
924                 }
925
926                 /* At this point we know that qdisc is not noop_qdisc,
927                  * which means that qdisc holds a reference to net_device
928                  * and we hold a reference to qdisc, so it is safe to release
929                  * rcu read lock.
930                  */
931                 rcu_read_unlock();
932
933                 /* Do we search for filter, attached to class? */
934                 if (TC_H_MIN(*parent)) {
935                         *cl = cops->find(*q, *parent);
936                         if (*cl == 0) {
937                                 NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
938                                 err = -ENOENT;
939                                 goto errout_qdisc;
940                         }
941                 }
942
943                 /* And the last stroke */
944                 block = cops->tcf_block(*q, *cl, extack);
945                 if (!block) {
946                         err = -EINVAL;
947                         goto errout_qdisc;
948                 }
949                 if (tcf_block_shared(block)) {
950                         NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
951                         err = -EOPNOTSUPP;
952                         goto errout_qdisc;
953                 }
954
955                 /* Always take reference to block in order to support execution
956                  * of rules update path of cls API without rtnl lock. Caller
957                  * must release block when it is finished using it. 'if' block
958                  * of this conditional obtain reference to block by calling
959                  * tcf_block_refcnt_get().
960                  */
961                 refcount_inc(&block->refcnt);
962         }
963
964         return block;
965
966 errout_rcu:
967         rcu_read_unlock();
968 errout_qdisc:
969         if (*q) {
970                 qdisc_put(*q);
971                 *q = NULL;
972         }
973         return ERR_PTR(err);
974 }
975
976 static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
977 {
978         if (!IS_ERR_OR_NULL(block))
979                 tcf_block_refcnt_put(block);
980
981         if (q)
982                 qdisc_put(q);
983 }
984
985 struct tcf_block_owner_item {
986         struct list_head list;
987         struct Qdisc *q;
988         enum tcf_block_binder_type binder_type;
989 };
990
991 static void
992 tcf_block_owner_netif_keep_dst(struct tcf_block *block,
993                                struct Qdisc *q,
994                                enum tcf_block_binder_type binder_type)
995 {
996         if (block->keep_dst &&
997             binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
998             binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
999                 netif_keep_dst(qdisc_dev(q));
1000 }
1001
1002 void tcf_block_netif_keep_dst(struct tcf_block *block)
1003 {
1004         struct tcf_block_owner_item *item;
1005
1006         block->keep_dst = true;
1007         list_for_each_entry(item, &block->owner_list, list)
1008                 tcf_block_owner_netif_keep_dst(block, item->q,
1009                                                item->binder_type);
1010 }
1011 EXPORT_SYMBOL(tcf_block_netif_keep_dst);
1012
1013 static int tcf_block_owner_add(struct tcf_block *block,
1014                                struct Qdisc *q,
1015                                enum tcf_block_binder_type binder_type)
1016 {
1017         struct tcf_block_owner_item *item;
1018
1019         item = kmalloc(sizeof(*item), GFP_KERNEL);
1020         if (!item)
1021                 return -ENOMEM;
1022         item->q = q;
1023         item->binder_type = binder_type;
1024         list_add(&item->list, &block->owner_list);
1025         return 0;
1026 }
1027
1028 static void tcf_block_owner_del(struct tcf_block *block,
1029                                 struct Qdisc *q,
1030                                 enum tcf_block_binder_type binder_type)
1031 {
1032         struct tcf_block_owner_item *item;
1033
1034         list_for_each_entry(item, &block->owner_list, list) {
1035                 if (item->q == q && item->binder_type == binder_type) {
1036                         list_del(&item->list);
1037                         kfree(item);
1038                         return;
1039                 }
1040         }
1041         WARN_ON(1);
1042 }
1043
1044 int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
1045                       struct tcf_block_ext_info *ei,
1046                       struct netlink_ext_ack *extack)
1047 {
1048         struct net *net = qdisc_net(q);
1049         struct tcf_block *block = NULL;
1050         int err;
1051
1052         if (ei->block_index)
1053                 /* block_index not 0 means the shared block is requested */
1054                 block = tcf_block_refcnt_get(net, ei->block_index);
1055
1056         if (!block) {
1057                 block = tcf_block_create(net, q, ei->block_index, extack);
1058                 if (IS_ERR(block))
1059                         return PTR_ERR(block);
1060                 if (tcf_block_shared(block)) {
1061                         err = tcf_block_insert(block, net, extack);
1062                         if (err)
1063                                 goto err_block_insert;
1064                 }
1065         }
1066
1067         err = tcf_block_owner_add(block, q, ei->binder_type);
1068         if (err)
1069                 goto err_block_owner_add;
1070
1071         tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
1072
1073         err = tcf_chain0_head_change_cb_add(block, ei, extack);
1074         if (err)
1075                 goto err_chain0_head_change_cb_add;
1076
1077         err = tcf_block_offload_bind(block, q, ei, extack);
1078         if (err)
1079                 goto err_block_offload_bind;
1080
1081         *p_block = block;
1082         return 0;
1083
1084 err_block_offload_bind:
1085         tcf_chain0_head_change_cb_del(block, ei);
1086 err_chain0_head_change_cb_add:
1087         tcf_block_owner_del(block, q, ei->binder_type);
1088 err_block_owner_add:
1089 err_block_insert:
1090         tcf_block_refcnt_put(block);
1091         return err;
1092 }
1093 EXPORT_SYMBOL(tcf_block_get_ext);
1094
1095 static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
1096 {
1097         struct tcf_proto __rcu **p_filter_chain = priv;
1098
1099         rcu_assign_pointer(*p_filter_chain, tp_head);
1100 }
1101
1102 int tcf_block_get(struct tcf_block **p_block,
1103                   struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
1104                   struct netlink_ext_ack *extack)
1105 {
1106         struct tcf_block_ext_info ei = {
1107                 .chain_head_change = tcf_chain_head_change_dflt,
1108                 .chain_head_change_priv = p_filter_chain,
1109         };
1110
1111         WARN_ON(!p_filter_chain);
1112         return tcf_block_get_ext(p_block, q, &ei, extack);
1113 }
1114 EXPORT_SYMBOL(tcf_block_get);
1115
1116 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
1117  * actions should be all removed after flushing.
1118  */
1119 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1120                        struct tcf_block_ext_info *ei)
1121 {
1122         if (!block)
1123                 return;
1124         tcf_chain0_head_change_cb_del(block, ei);
1125         tcf_block_owner_del(block, q, ei->binder_type);
1126
1127         __tcf_block_put(block, q, ei);
1128 }
1129 EXPORT_SYMBOL(tcf_block_put_ext);
1130
1131 void tcf_block_put(struct tcf_block *block)
1132 {
1133         struct tcf_block_ext_info ei = {0, };
1134
1135         if (!block)
1136                 return;
1137         tcf_block_put_ext(block, block->q, &ei);
1138 }
1139
1140 EXPORT_SYMBOL(tcf_block_put);
1141
1142 struct tcf_block_cb {
1143         struct list_head list;
1144         tc_setup_cb_t *cb;
1145         void *cb_ident;
1146         void *cb_priv;
1147         unsigned int refcnt;
1148 };
1149
1150 void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
1151 {
1152         return block_cb->cb_priv;
1153 }
1154 EXPORT_SYMBOL(tcf_block_cb_priv);
1155
1156 struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
1157                                          tc_setup_cb_t *cb, void *cb_ident)
1158 {       struct tcf_block_cb *block_cb;
1159
1160         list_for_each_entry(block_cb, &block->cb_list, list)
1161                 if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
1162                         return block_cb;
1163         return NULL;
1164 }
1165 EXPORT_SYMBOL(tcf_block_cb_lookup);
1166
1167 void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
1168 {
1169         block_cb->refcnt++;
1170 }
1171 EXPORT_SYMBOL(tcf_block_cb_incref);
1172
1173 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
1174 {
1175         return --block_cb->refcnt;
1176 }
1177 EXPORT_SYMBOL(tcf_block_cb_decref);
1178
1179 static int
1180 tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
1181                             void *cb_priv, bool add, bool offload_in_use,
1182                             struct netlink_ext_ack *extack)
1183 {
1184         struct tcf_chain *chain;
1185         struct tcf_proto *tp;
1186         int err;
1187
1188         list_for_each_entry(chain, &block->chain_list, list) {
1189                 for (tp = rtnl_dereference(chain->filter_chain); tp;
1190                      tp = rtnl_dereference(tp->next)) {
1191                         if (tp->ops->reoffload) {
1192                                 err = tp->ops->reoffload(tp, add, cb, cb_priv,
1193                                                          extack);
1194                                 if (err && add)
1195                                         goto err_playback_remove;
1196                         } else if (add && offload_in_use) {
1197                                 err = -EOPNOTSUPP;
1198                                 NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
1199                                 goto err_playback_remove;
1200                         }
1201                 }
1202         }
1203
1204         return 0;
1205
1206 err_playback_remove:
1207         tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
1208                                     extack);
1209         return err;
1210 }
1211
1212 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
1213                                              tc_setup_cb_t *cb, void *cb_ident,
1214                                              void *cb_priv,
1215                                              struct netlink_ext_ack *extack)
1216 {
1217         struct tcf_block_cb *block_cb;
1218         int err;
1219
1220         /* Replay any already present rules */
1221         err = tcf_block_playback_offloads(block, cb, cb_priv, true,
1222                                           tcf_block_offload_in_use(block),
1223                                           extack);
1224         if (err)
1225                 return ERR_PTR(err);
1226
1227         block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
1228         if (!block_cb)
1229                 return ERR_PTR(-ENOMEM);
1230         block_cb->cb = cb;
1231         block_cb->cb_ident = cb_ident;
1232         block_cb->cb_priv = cb_priv;
1233         list_add(&block_cb->list, &block->cb_list);
1234         return block_cb;
1235 }
1236 EXPORT_SYMBOL(__tcf_block_cb_register);
1237
1238 int tcf_block_cb_register(struct tcf_block *block,
1239                           tc_setup_cb_t *cb, void *cb_ident,
1240                           void *cb_priv, struct netlink_ext_ack *extack)
1241 {
1242         struct tcf_block_cb *block_cb;
1243
1244         block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
1245                                            extack);
1246         return PTR_ERR_OR_ZERO(block_cb);
1247 }
1248 EXPORT_SYMBOL(tcf_block_cb_register);
1249
1250 void __tcf_block_cb_unregister(struct tcf_block *block,
1251                                struct tcf_block_cb *block_cb)
1252 {
1253         tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
1254                                     false, tcf_block_offload_in_use(block),
1255                                     NULL);
1256         list_del(&block_cb->list);
1257         kfree(block_cb);
1258 }
1259 EXPORT_SYMBOL(__tcf_block_cb_unregister);
1260
1261 void tcf_block_cb_unregister(struct tcf_block *block,
1262                              tc_setup_cb_t *cb, void *cb_ident)
1263 {
1264         struct tcf_block_cb *block_cb;
1265
1266         block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
1267         if (!block_cb)
1268                 return;
1269         __tcf_block_cb_unregister(block, block_cb);
1270 }
1271 EXPORT_SYMBOL(tcf_block_cb_unregister);
1272
1273 /* Main classifier routine: scans classifier chain attached
1274  * to this qdisc, (optionally) tests for protocol and asks
1275  * specific classifiers.
1276  */
1277 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1278                  struct tcf_result *res, bool compat_mode)
1279 {
1280         __be16 protocol = tc_skb_protocol(skb);
1281 #ifdef CONFIG_NET_CLS_ACT
1282         const int max_reclassify_loop = 4;
1283         const struct tcf_proto *orig_tp = tp;
1284         const struct tcf_proto *first_tp;
1285         int limit = 0;
1286
1287 reclassify:
1288 #endif
1289         for (; tp; tp = rcu_dereference_bh(tp->next)) {
1290                 int err;
1291
1292                 if (tp->protocol != protocol &&
1293                     tp->protocol != htons(ETH_P_ALL))
1294                         continue;
1295
1296                 err = tp->classify(skb, tp, res);
1297 #ifdef CONFIG_NET_CLS_ACT
1298                 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1299                         first_tp = orig_tp;
1300                         goto reset;
1301                 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1302                         first_tp = res->goto_tp;
1303                         goto reset;
1304                 }
1305 #endif
1306                 if (err >= 0)
1307                         return err;
1308         }
1309
1310         return TC_ACT_UNSPEC; /* signal: continue lookup */
1311 #ifdef CONFIG_NET_CLS_ACT
1312 reset:
1313         if (unlikely(limit++ >= max_reclassify_loop)) {
1314                 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1315                                        tp->chain->block->index,
1316                                        tp->prio & 0xffff,
1317                                        ntohs(tp->protocol));
1318                 return TC_ACT_SHOT;
1319         }
1320
1321         tp = first_tp;
1322         protocol = tc_skb_protocol(skb);
1323         goto reclassify;
1324 #endif
1325 }
1326 EXPORT_SYMBOL(tcf_classify);
1327
1328 struct tcf_chain_info {
1329         struct tcf_proto __rcu **pprev;
1330         struct tcf_proto __rcu *next;
1331 };
1332
1333 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
1334 {
1335         return rtnl_dereference(*chain_info->pprev);
1336 }
1337
1338 static void tcf_chain_tp_insert(struct tcf_chain *chain,
1339                                 struct tcf_chain_info *chain_info,
1340                                 struct tcf_proto *tp)
1341 {
1342         if (*chain_info->pprev == chain->filter_chain)
1343                 tcf_chain0_head_change(chain, tp);
1344         RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
1345         rcu_assign_pointer(*chain_info->pprev, tp);
1346         tcf_chain_hold(chain);
1347 }
1348
1349 static void tcf_chain_tp_remove(struct tcf_chain *chain,
1350                                 struct tcf_chain_info *chain_info,
1351                                 struct tcf_proto *tp)
1352 {
1353         struct tcf_proto *next = rtnl_dereference(chain_info->next);
1354
1355         if (tp == chain->filter_chain)
1356                 tcf_chain0_head_change(chain, next);
1357         RCU_INIT_POINTER(*chain_info->pprev, next);
1358         tcf_chain_put(chain);
1359 }
1360
1361 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1362                                            struct tcf_chain_info *chain_info,
1363                                            u32 protocol, u32 prio,
1364                                            bool prio_allocate)
1365 {
1366         struct tcf_proto **pprev;
1367         struct tcf_proto *tp;
1368
1369         /* Check the chain for existence of proto-tcf with this priority */
1370         for (pprev = &chain->filter_chain;
1371              (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
1372                 if (tp->prio >= prio) {
1373                         if (tp->prio == prio) {
1374                                 if (prio_allocate ||
1375                                     (tp->protocol != protocol && protocol))
1376                                         return ERR_PTR(-EINVAL);
1377                         } else {
1378                                 tp = NULL;
1379                         }
1380                         break;
1381                 }
1382         }
1383         chain_info->pprev = pprev;
1384         chain_info->next = tp ? tp->next : NULL;
1385         return tp;
1386 }
1387
1388 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1389                          struct tcf_proto *tp, struct tcf_block *block,
1390                          struct Qdisc *q, u32 parent, void *fh,
1391                          u32 portid, u32 seq, u16 flags, int event)
1392 {
1393         struct tcmsg *tcm;
1394         struct nlmsghdr  *nlh;
1395         unsigned char *b = skb_tail_pointer(skb);
1396
1397         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1398         if (!nlh)
1399                 goto out_nlmsg_trim;
1400         tcm = nlmsg_data(nlh);
1401         tcm->tcm_family = AF_UNSPEC;
1402         tcm->tcm__pad1 = 0;
1403         tcm->tcm__pad2 = 0;
1404         if (q) {
1405                 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1406                 tcm->tcm_parent = parent;
1407         } else {
1408                 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
1409                 tcm->tcm_block_index = block->index;
1410         }
1411         tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
1412         if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
1413                 goto nla_put_failure;
1414         if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
1415                 goto nla_put_failure;
1416         if (!fh) {
1417                 tcm->tcm_handle = 0;
1418         } else {
1419                 if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
1420                         goto nla_put_failure;
1421         }
1422         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1423         return skb->len;
1424
1425 out_nlmsg_trim:
1426 nla_put_failure:
1427         nlmsg_trim(skb, b);
1428         return -1;
1429 }
1430
1431 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
1432                           struct nlmsghdr *n, struct tcf_proto *tp,
1433                           struct tcf_block *block, struct Qdisc *q,
1434                           u32 parent, void *fh, int event, bool unicast)
1435 {
1436         struct sk_buff *skb;
1437         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1438
1439         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1440         if (!skb)
1441                 return -ENOBUFS;
1442
1443         if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1444                           n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
1445                 kfree_skb(skb);
1446                 return -EINVAL;
1447         }
1448
1449         if (unicast)
1450                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1451
1452         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1453                               n->nlmsg_flags & NLM_F_ECHO);
1454 }
1455
1456 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
1457                               struct nlmsghdr *n, struct tcf_proto *tp,
1458                               struct tcf_block *block, struct Qdisc *q,
1459                               u32 parent, void *fh, bool unicast, bool *last,
1460                               struct netlink_ext_ack *extack)
1461 {
1462         struct sk_buff *skb;
1463         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1464         int err;
1465
1466         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1467         if (!skb)
1468                 return -ENOBUFS;
1469
1470         if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1471                           n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
1472                 NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1473                 kfree_skb(skb);
1474                 return -EINVAL;
1475         }
1476
1477         err = tp->ops->delete(tp, fh, last, extack);
1478         if (err) {
1479                 kfree_skb(skb);
1480                 return err;
1481         }
1482
1483         if (unicast)
1484                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1485
1486         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1487                              n->nlmsg_flags & NLM_F_ECHO);
1488         if (err < 0)
1489                 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1490         return err;
1491 }
1492
1493 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1494                                  struct tcf_block *block, struct Qdisc *q,
1495                                  u32 parent, struct nlmsghdr *n,
1496                                  struct tcf_chain *chain, int event)
1497 {
1498         struct tcf_proto *tp;
1499
1500         for (tp = rtnl_dereference(chain->filter_chain);
1501              tp; tp = rtnl_dereference(tp->next))
1502                 tfilter_notify(net, oskb, n, tp, block,
1503                                q, parent, NULL, event, false);
1504 }
1505
1506 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1507                           struct netlink_ext_ack *extack)
1508 {
1509         struct net *net = sock_net(skb->sk);
1510         struct nlattr *tca[TCA_MAX + 1];
1511         struct tcmsg *t;
1512         u32 protocol;
1513         u32 prio;
1514         bool prio_allocate;
1515         u32 parent;
1516         u32 chain_index;
1517         struct Qdisc *q = NULL;
1518         struct tcf_chain_info chain_info;
1519         struct tcf_chain *chain = NULL;
1520         struct tcf_block *block;
1521         struct tcf_proto *tp;
1522         unsigned long cl;
1523         void *fh;
1524         int err;
1525         int tp_created;
1526
1527         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1528                 return -EPERM;
1529
1530 replay:
1531         tp_created = 0;
1532
1533         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1534         if (err < 0)
1535                 return err;
1536
1537         t = nlmsg_data(n);
1538         protocol = TC_H_MIN(t->tcm_info);
1539         prio = TC_H_MAJ(t->tcm_info);
1540         prio_allocate = false;
1541         parent = t->tcm_parent;
1542         cl = 0;
1543
1544         if (prio == 0) {
1545                 /* If no priority is provided by the user,
1546                  * we allocate one.
1547                  */
1548                 if (n->nlmsg_flags & NLM_F_CREATE) {
1549                         prio = TC_H_MAKE(0x80000000U, 0U);
1550                         prio_allocate = true;
1551                 } else {
1552                         NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
1553                         return -ENOENT;
1554                 }
1555         }
1556
1557         /* Find head of filter chain. */
1558
1559         block = tcf_block_find(net, &q, &parent, &cl,
1560                                t->tcm_ifindex, t->tcm_block_index, extack);
1561         if (IS_ERR(block)) {
1562                 err = PTR_ERR(block);
1563                 goto errout;
1564         }
1565
1566         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1567         if (chain_index > TC_ACT_EXT_VAL_MASK) {
1568                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1569                 err = -EINVAL;
1570                 goto errout;
1571         }
1572         chain = tcf_chain_get(block, chain_index, true);
1573         if (!chain) {
1574                 NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
1575                 err = -ENOMEM;
1576                 goto errout;
1577         }
1578
1579         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1580                                prio, prio_allocate);
1581         if (IS_ERR(tp)) {
1582                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1583                 err = PTR_ERR(tp);
1584                 goto errout;
1585         }
1586
1587         if (tp == NULL) {
1588                 /* Proto-tcf does not exist, create new one */
1589
1590                 if (tca[TCA_KIND] == NULL || !protocol) {
1591                         NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
1592                         err = -EINVAL;
1593                         goto errout;
1594                 }
1595
1596                 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1597                         NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1598                         err = -ENOENT;
1599                         goto errout;
1600                 }
1601
1602                 if (prio_allocate)
1603                         prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
1604
1605                 tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
1606                                       protocol, prio, chain, extack);
1607                 if (IS_ERR(tp)) {
1608                         err = PTR_ERR(tp);
1609                         goto errout;
1610                 }
1611                 tp_created = 1;
1612         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1613                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1614                 err = -EINVAL;
1615                 goto errout;
1616         }
1617
1618         fh = tp->ops->get(tp, t->tcm_handle);
1619
1620         if (!fh) {
1621                 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1622                         NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1623                         err = -ENOENT;
1624                         goto errout;
1625                 }
1626         } else if (n->nlmsg_flags & NLM_F_EXCL) {
1627                 NL_SET_ERR_MSG(extack, "Filter already exists");
1628                 err = -EEXIST;
1629                 goto errout;
1630         }
1631
1632         if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
1633                 NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
1634                 err = -EINVAL;
1635                 goto errout;
1636         }
1637
1638         err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
1639                               n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
1640                               extack);
1641         if (err == 0) {
1642                 if (tp_created)
1643                         tcf_chain_tp_insert(chain, &chain_info, tp);
1644                 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1645                                RTM_NEWTFILTER, false);
1646         } else {
1647                 if (tp_created)
1648                         tcf_proto_destroy(tp, NULL);
1649         }
1650
1651 errout:
1652         if (chain)
1653                 tcf_chain_put(chain);
1654         tcf_block_release(q, block);
1655         if (err == -EAGAIN)
1656                 /* Replay the request. */
1657                 goto replay;
1658         return err;
1659 }
1660
1661 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1662                           struct netlink_ext_ack *extack)
1663 {
1664         struct net *net = sock_net(skb->sk);
1665         struct nlattr *tca[TCA_MAX + 1];
1666         struct tcmsg *t;
1667         u32 protocol;
1668         u32 prio;
1669         u32 parent;
1670         u32 chain_index;
1671         struct Qdisc *q = NULL;
1672         struct tcf_chain_info chain_info;
1673         struct tcf_chain *chain = NULL;
1674         struct tcf_block *block;
1675         struct tcf_proto *tp = NULL;
1676         unsigned long cl = 0;
1677         void *fh = NULL;
1678         int err;
1679
1680         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1681                 return -EPERM;
1682
1683         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1684         if (err < 0)
1685                 return err;
1686
1687         t = nlmsg_data(n);
1688         protocol = TC_H_MIN(t->tcm_info);
1689         prio = TC_H_MAJ(t->tcm_info);
1690         parent = t->tcm_parent;
1691
1692         if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
1693                 NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
1694                 return -ENOENT;
1695         }
1696
1697         /* Find head of filter chain. */
1698
1699         block = tcf_block_find(net, &q, &parent, &cl,
1700                                t->tcm_ifindex, t->tcm_block_index, extack);
1701         if (IS_ERR(block)) {
1702                 err = PTR_ERR(block);
1703                 goto errout;
1704         }
1705
1706         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1707         if (chain_index > TC_ACT_EXT_VAL_MASK) {
1708                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1709                 err = -EINVAL;
1710                 goto errout;
1711         }
1712         chain = tcf_chain_get(block, chain_index, false);
1713         if (!chain) {
1714                 /* User requested flush on non-existent chain. Nothing to do,
1715                  * so just return success.
1716                  */
1717                 if (prio == 0) {
1718                         err = 0;
1719                         goto errout;
1720                 }
1721                 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
1722                 err = -ENOENT;
1723                 goto errout;
1724         }
1725
1726         if (prio == 0) {
1727                 tfilter_notify_chain(net, skb, block, q, parent, n,
1728                                      chain, RTM_DELTFILTER);
1729                 tcf_chain_flush(chain);
1730                 err = 0;
1731                 goto errout;
1732         }
1733
1734         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1735                                prio, false);
1736         if (!tp || IS_ERR(tp)) {
1737                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1738                 err = tp ? PTR_ERR(tp) : -ENOENT;
1739                 goto errout;
1740         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1741                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1742                 err = -EINVAL;
1743                 goto errout;
1744         }
1745
1746         fh = tp->ops->get(tp, t->tcm_handle);
1747
1748         if (!fh) {
1749                 if (t->tcm_handle == 0) {
1750                         tcf_chain_tp_remove(chain, &chain_info, tp);
1751                         tfilter_notify(net, skb, n, tp, block, q, parent, fh,
1752                                        RTM_DELTFILTER, false);
1753                         tcf_proto_destroy(tp, extack);
1754                         err = 0;
1755                 } else {
1756                         NL_SET_ERR_MSG(extack, "Specified filter handle not found");
1757                         err = -ENOENT;
1758                 }
1759         } else {
1760                 bool last;
1761
1762                 err = tfilter_del_notify(net, skb, n, tp, block,
1763                                          q, parent, fh, false, &last,
1764                                          extack);
1765                 if (err)
1766                         goto errout;
1767                 if (last) {
1768                         tcf_chain_tp_remove(chain, &chain_info, tp);
1769                         tcf_proto_destroy(tp, extack);
1770                 }
1771         }
1772
1773 errout:
1774         if (chain)
1775                 tcf_chain_put(chain);
1776         tcf_block_release(q, block);
1777         return err;
1778 }
1779
1780 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1781                           struct netlink_ext_ack *extack)
1782 {
1783         struct net *net = sock_net(skb->sk);
1784         struct nlattr *tca[TCA_MAX + 1];
1785         struct tcmsg *t;
1786         u32 protocol;
1787         u32 prio;
1788         u32 parent;
1789         u32 chain_index;
1790         struct Qdisc *q = NULL;
1791         struct tcf_chain_info chain_info;
1792         struct tcf_chain *chain = NULL;
1793         struct tcf_block *block;
1794         struct tcf_proto *tp = NULL;
1795         unsigned long cl = 0;
1796         void *fh = NULL;
1797         int err;
1798
1799         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
1800         if (err < 0)
1801                 return err;
1802
1803         t = nlmsg_data(n);
1804         protocol = TC_H_MIN(t->tcm_info);
1805         prio = TC_H_MAJ(t->tcm_info);
1806         parent = t->tcm_parent;
1807
1808         if (prio == 0) {
1809                 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
1810                 return -ENOENT;
1811         }
1812
1813         /* Find head of filter chain. */
1814
1815         block = tcf_block_find(net, &q, &parent, &cl,
1816                                t->tcm_ifindex, t->tcm_block_index, extack);
1817         if (IS_ERR(block)) {
1818                 err = PTR_ERR(block);
1819                 goto errout;
1820         }
1821
1822         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
1823         if (chain_index > TC_ACT_EXT_VAL_MASK) {
1824                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
1825                 err = -EINVAL;
1826                 goto errout;
1827         }
1828         chain = tcf_chain_get(block, chain_index, false);
1829         if (!chain) {
1830                 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
1831                 err = -EINVAL;
1832                 goto errout;
1833         }
1834
1835         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
1836                                prio, false);
1837         if (!tp || IS_ERR(tp)) {
1838                 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
1839                 err = tp ? PTR_ERR(tp) : -ENOENT;
1840                 goto errout;
1841         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
1842                 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
1843                 err = -EINVAL;
1844                 goto errout;
1845         }
1846
1847         fh = tp->ops->get(tp, t->tcm_handle);
1848
1849         if (!fh) {
1850                 NL_SET_ERR_MSG(extack, "Specified filter handle not found");
1851                 err = -ENOENT;
1852         } else {
1853                 err = tfilter_notify(net, skb, n, tp, block, q, parent,
1854                                      fh, RTM_NEWTFILTER, true);
1855                 if (err < 0)
1856                         NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
1857         }
1858
1859 errout:
1860         if (chain)
1861                 tcf_chain_put(chain);
1862         tcf_block_release(q, block);
1863         return err;
1864 }
1865
1866 struct tcf_dump_args {
1867         struct tcf_walker w;
1868         struct sk_buff *skb;
1869         struct netlink_callback *cb;
1870         struct tcf_block *block;
1871         struct Qdisc *q;
1872         u32 parent;
1873 };
1874
1875 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1876 {
1877         struct tcf_dump_args *a = (void *)arg;
1878         struct net *net = sock_net(a->skb->sk);
1879
1880         return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
1881                              n, NETLINK_CB(a->cb->skb).portid,
1882                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1883                              RTM_NEWTFILTER);
1884 }
1885
1886 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
1887                            struct sk_buff *skb, struct netlink_callback *cb,
1888                            long index_start, long *p_index)
1889 {
1890         struct net *net = sock_net(skb->sk);
1891         struct tcf_block *block = chain->block;
1892         struct tcmsg *tcm = nlmsg_data(cb->nlh);
1893         struct tcf_dump_args arg;
1894         struct tcf_proto *tp;
1895
1896         for (tp = rtnl_dereference(chain->filter_chain);
1897              tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
1898                 if (*p_index < index_start)
1899                         continue;
1900                 if (TC_H_MAJ(tcm->tcm_info) &&
1901                     TC_H_MAJ(tcm->tcm_info) != tp->prio)
1902                         continue;
1903                 if (TC_H_MIN(tcm->tcm_info) &&
1904                     TC_H_MIN(tcm->tcm_info) != tp->protocol)
1905                         continue;
1906                 if (*p_index > index_start)
1907                         memset(&cb->args[1], 0,
1908                                sizeof(cb->args) - sizeof(cb->args[0]));
1909                 if (cb->args[1] == 0) {
1910                         if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
1911                                           NETLINK_CB(cb->skb).portid,
1912                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1913                                           RTM_NEWTFILTER) <= 0)
1914                                 return false;
1915
1916                         cb->args[1] = 1;
1917                 }
1918                 if (!tp->ops->walk)
1919                         continue;
1920                 arg.w.fn = tcf_node_dump;
1921                 arg.skb = skb;
1922                 arg.cb = cb;
1923                 arg.block = block;
1924                 arg.q = q;
1925                 arg.parent = parent;
1926                 arg.w.stop = 0;
1927                 arg.w.skip = cb->args[1] - 1;
1928                 arg.w.count = 0;
1929                 arg.w.cookie = cb->args[2];
1930                 tp->ops->walk(tp, &arg.w);
1931                 cb->args[2] = arg.w.cookie;
1932                 cb->args[1] = arg.w.count + 1;
1933                 if (arg.w.stop)
1934                         return false;
1935         }
1936         return true;
1937 }
1938
1939 /* called with RTNL */
1940 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
1941 {
1942         struct net *net = sock_net(skb->sk);
1943         struct nlattr *tca[TCA_MAX + 1];
1944         struct Qdisc *q = NULL;
1945         struct tcf_block *block;
1946         struct tcf_chain *chain;
1947         struct tcmsg *tcm = nlmsg_data(cb->nlh);
1948         long index_start;
1949         long index;
1950         u32 parent;
1951         int err;
1952
1953         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1954                 return skb->len;
1955
1956         err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
1957                           cb->extack);
1958         if (err)
1959                 return err;
1960
1961         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1962                 block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
1963                 if (!block)
1964                         goto out;
1965                 /* If we work with block index, q is NULL and parent value
1966                  * will never be used in the following code. The check
1967                  * in tcf_fill_node prevents it. However, compiler does not
1968                  * see that far, so set parent to zero to silence the warning
1969                  * about parent being uninitialized.
1970                  */
1971                 parent = 0;
1972         } else {
1973                 const struct Qdisc_class_ops *cops;
1974                 struct net_device *dev;
1975                 unsigned long cl = 0;
1976
1977                 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1978                 if (!dev)
1979                         return skb->len;
1980
1981                 parent = tcm->tcm_parent;
1982                 if (!parent) {
1983                         q = dev->qdisc;
1984                         parent = q->handle;
1985                 } else {
1986                         q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1987                 }
1988                 if (!q)
1989                         goto out;
1990                 cops = q->ops->cl_ops;
1991                 if (!cops)
1992                         goto out;
1993                 if (!cops->tcf_block)
1994                         goto out;
1995                 if (TC_H_MIN(tcm->tcm_parent)) {
1996                         cl = cops->find(q, tcm->tcm_parent);
1997                         if (cl == 0)
1998                                 goto out;
1999                 }
2000                 block = cops->tcf_block(q, cl, NULL);
2001                 if (!block)
2002                         goto out;
2003                 if (tcf_block_shared(block))
2004                         q = NULL;
2005         }
2006
2007         index_start = cb->args[0];
2008         index = 0;
2009
2010         list_for_each_entry(chain, &block->chain_list, list) {
2011                 if (tca[TCA_CHAIN] &&
2012                     nla_get_u32(tca[TCA_CHAIN]) != chain->index)
2013                         continue;
2014                 if (!tcf_chain_dump(chain, q, parent, skb, cb,
2015                                     index_start, &index)) {
2016                         err = -EMSGSIZE;
2017                         break;
2018                 }
2019         }
2020
2021         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2022                 tcf_block_refcnt_put(block);
2023         cb->args[0] = index;
2024
2025 out:
2026         /* If we did no progress, the error (EMSGSIZE) is real */
2027         if (skb->len == 0 && err)
2028                 return err;
2029         return skb->len;
2030 }
2031
2032 static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
2033                               struct sk_buff *skb, struct tcf_block *block,
2034                               u32 portid, u32 seq, u16 flags, int event)
2035 {
2036         unsigned char *b = skb_tail_pointer(skb);
2037         const struct tcf_proto_ops *ops;
2038         struct nlmsghdr *nlh;
2039         struct tcmsg *tcm;
2040         void *priv;
2041
2042         ops = chain->tmplt_ops;
2043         priv = chain->tmplt_priv;
2044
2045         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
2046         if (!nlh)
2047                 goto out_nlmsg_trim;
2048         tcm = nlmsg_data(nlh);
2049         tcm->tcm_family = AF_UNSPEC;
2050         tcm->tcm__pad1 = 0;
2051         tcm->tcm__pad2 = 0;
2052         tcm->tcm_handle = 0;
2053         if (block->q) {
2054                 tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
2055                 tcm->tcm_parent = block->q->handle;
2056         } else {
2057                 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
2058                 tcm->tcm_block_index = block->index;
2059         }
2060
2061         if (nla_put_u32(skb, TCA_CHAIN, chain->index))
2062                 goto nla_put_failure;
2063
2064         if (ops) {
2065                 if (nla_put_string(skb, TCA_KIND, ops->kind))
2066                         goto nla_put_failure;
2067                 if (ops->tmplt_dump(skb, net, priv) < 0)
2068                         goto nla_put_failure;
2069         }
2070
2071         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2072         return skb->len;
2073
2074 out_nlmsg_trim:
2075 nla_put_failure:
2076         nlmsg_trim(skb, b);
2077         return -EMSGSIZE;
2078 }
2079
2080 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
2081                            u32 seq, u16 flags, int event, bool unicast)
2082 {
2083         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2084         struct tcf_block *block = chain->block;
2085         struct net *net = block->net;
2086         struct sk_buff *skb;
2087
2088         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2089         if (!skb)
2090                 return -ENOBUFS;
2091
2092         if (tc_chain_fill_node(chain, net, skb, block, portid,
2093                                seq, flags, event) <= 0) {
2094                 kfree_skb(skb);
2095                 return -EINVAL;
2096         }
2097
2098         if (unicast)
2099                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2100
2101         return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2102 }
2103
2104 static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
2105                               struct nlattr **tca,
2106                               struct netlink_ext_ack *extack)
2107 {
2108         const struct tcf_proto_ops *ops;
2109         void *tmplt_priv;
2110
2111         /* If kind is not set, user did not specify template. */
2112         if (!tca[TCA_KIND])
2113                 return 0;
2114
2115         ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
2116         if (IS_ERR(ops))
2117                 return PTR_ERR(ops);
2118         if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
2119                 NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2120                 return -EOPNOTSUPP;
2121         }
2122
2123         tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
2124         if (IS_ERR(tmplt_priv)) {
2125                 module_put(ops->owner);
2126                 return PTR_ERR(tmplt_priv);
2127         }
2128         chain->tmplt_ops = ops;
2129         chain->tmplt_priv = tmplt_priv;
2130         return 0;
2131 }
2132
2133 static void tc_chain_tmplt_del(struct tcf_chain *chain)
2134 {
2135         const struct tcf_proto_ops *ops = chain->tmplt_ops;
2136
2137         /* If template ops are set, no work to do for us. */
2138         if (!ops)
2139                 return;
2140
2141         ops->tmplt_destroy(chain->tmplt_priv);
2142         module_put(ops->owner);
2143 }
2144
2145 /* Add/delete/get a chain */
2146
2147 static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
2148                         struct netlink_ext_ack *extack)
2149 {
2150         struct net *net = sock_net(skb->sk);
2151         struct nlattr *tca[TCA_MAX + 1];
2152         struct tcmsg *t;
2153         u32 parent;
2154         u32 chain_index;
2155         struct Qdisc *q = NULL;
2156         struct tcf_chain *chain = NULL;
2157         struct tcf_block *block;
2158         unsigned long cl;
2159         int err;
2160
2161         if (n->nlmsg_type != RTM_GETCHAIN &&
2162             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2163                 return -EPERM;
2164
2165 replay:
2166         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2167         if (err < 0)
2168                 return err;
2169
2170         t = nlmsg_data(n);
2171         parent = t->tcm_parent;
2172         cl = 0;
2173
2174         block = tcf_block_find(net, &q, &parent, &cl,
2175                                t->tcm_ifindex, t->tcm_block_index, extack);
2176         if (IS_ERR(block))
2177                 return PTR_ERR(block);
2178
2179         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2180         if (chain_index > TC_ACT_EXT_VAL_MASK) {
2181                 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2182                 err = -EINVAL;
2183                 goto errout_block;
2184         }
2185         chain = tcf_chain_lookup(block, chain_index);
2186         if (n->nlmsg_type == RTM_NEWCHAIN) {
2187                 if (chain) {
2188                         if (tcf_chain_held_by_acts_only(chain)) {
2189                                 /* The chain exists only because there is
2190                                  * some action referencing it.
2191                                  */
2192                                 tcf_chain_hold(chain);
2193                         } else {
2194                                 NL_SET_ERR_MSG(extack, "Filter chain already exists");
2195                                 err = -EEXIST;
2196                                 goto errout_block;
2197                         }
2198                 } else {
2199                         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2200                                 NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2201                                 err = -ENOENT;
2202                                 goto errout_block;
2203                         }
2204                         chain = tcf_chain_create(block, chain_index);
2205                         if (!chain) {
2206                                 NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2207                                 err = -ENOMEM;
2208                                 goto errout_block;
2209                         }
2210                 }
2211         } else {
2212                 if (!chain || tcf_chain_held_by_acts_only(chain)) {
2213                         NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2214                         err = -EINVAL;
2215                         goto errout_block;
2216                 }
2217                 tcf_chain_hold(chain);
2218         }
2219
2220         switch (n->nlmsg_type) {
2221         case RTM_NEWCHAIN:
2222                 err = tc_chain_tmplt_add(chain, net, tca, extack);
2223                 if (err)
2224                         goto errout;
2225                 /* In case the chain was successfully added, take a reference
2226                  * to the chain. This ensures that an empty chain
2227                  * does not disappear at the end of this function.
2228                  */
2229                 tcf_chain_hold(chain);
2230                 chain->explicitly_created = true;
2231                 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
2232                                 RTM_NEWCHAIN, false);
2233                 break;
2234         case RTM_DELCHAIN:
2235                 tfilter_notify_chain(net, skb, block, q, parent, n,
2236                                      chain, RTM_DELTFILTER);
2237                 /* Flush the chain first as the user requested chain removal. */
2238                 tcf_chain_flush(chain);
2239                 /* In case the chain was successfully deleted, put a reference
2240                  * to the chain previously taken during addition.
2241                  */
2242                 tcf_chain_put_explicitly_created(chain);
2243                 chain->explicitly_created = false;
2244                 break;
2245         case RTM_GETCHAIN:
2246                 err = tc_chain_notify(chain, skb, n->nlmsg_seq,
2247                                       n->nlmsg_seq, n->nlmsg_type, true);
2248                 if (err < 0)
2249                         NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
2250                 break;
2251         default:
2252                 err = -EOPNOTSUPP;
2253                 NL_SET_ERR_MSG(extack, "Unsupported message type");
2254                 goto errout;
2255         }
2256
2257 errout:
2258         tcf_chain_put(chain);
2259 errout_block:
2260         tcf_block_release(q, block);
2261         if (err == -EAGAIN)
2262                 /* Replay the request. */
2263                 goto replay;
2264         return err;
2265 }
2266
2267 /* called with RTNL */
2268 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
2269 {
2270         struct net *net = sock_net(skb->sk);
2271         struct nlattr *tca[TCA_MAX + 1];
2272         struct Qdisc *q = NULL;
2273         struct tcf_block *block;
2274         struct tcf_chain *chain;
2275         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2276         long index_start;
2277         long index;
2278         u32 parent;
2279         int err;
2280
2281         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2282                 return skb->len;
2283
2284         err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
2285                           cb->extack);
2286         if (err)
2287                 return err;
2288
2289         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2290                 block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2291                 if (!block)
2292                         goto out;
2293                 /* If we work with block index, q is NULL and parent value
2294                  * will never be used in the following code. The check
2295                  * in tcf_fill_node prevents it. However, compiler does not
2296                  * see that far, so set parent to zero to silence the warning
2297                  * about parent being uninitialized.
2298                  */
2299                 parent = 0;
2300         } else {
2301                 const struct Qdisc_class_ops *cops;
2302                 struct net_device *dev;
2303                 unsigned long cl = 0;
2304
2305                 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2306                 if (!dev)
2307                         return skb->len;
2308
2309                 parent = tcm->tcm_parent;
2310                 if (!parent) {
2311                         q = dev->qdisc;
2312                         parent = q->handle;
2313                 } else {
2314                         q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2315                 }
2316                 if (!q)
2317                         goto out;
2318                 cops = q->ops->cl_ops;
2319                 if (!cops)
2320                         goto out;
2321                 if (!cops->tcf_block)
2322                         goto out;
2323                 if (TC_H_MIN(tcm->tcm_parent)) {
2324                         cl = cops->find(q, tcm->tcm_parent);
2325                         if (cl == 0)
2326                                 goto out;
2327                 }
2328                 block = cops->tcf_block(q, cl, NULL);
2329                 if (!block)
2330                         goto out;
2331                 if (tcf_block_shared(block))
2332                         q = NULL;
2333         }
2334
2335         index_start = cb->args[0];
2336         index = 0;
2337
2338         list_for_each_entry(chain, &block->chain_list, list) {
2339                 if ((tca[TCA_CHAIN] &&
2340                      nla_get_u32(tca[TCA_CHAIN]) != chain->index))
2341                         continue;
2342                 if (index < index_start) {
2343                         index++;
2344                         continue;
2345                 }
2346                 if (tcf_chain_held_by_acts_only(chain))
2347                         continue;
2348                 err = tc_chain_fill_node(chain, net, skb, block,
2349                                          NETLINK_CB(cb->skb).portid,
2350                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
2351                                          RTM_NEWCHAIN);
2352                 if (err <= 0)
2353                         break;
2354                 index++;
2355         }
2356
2357         if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2358                 tcf_block_refcnt_put(block);
2359         cb->args[0] = index;
2360
2361 out:
2362         /* If we did no progress, the error (EMSGSIZE) is real */
2363         if (skb->len == 0 && err)
2364                 return err;
2365         return skb->len;
2366 }
2367
2368 void tcf_exts_destroy(struct tcf_exts *exts)
2369 {
2370 #ifdef CONFIG_NET_CLS_ACT
2371         tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
2372         kfree(exts->actions);
2373         exts->nr_actions = 0;
2374 #endif
2375 }
2376 EXPORT_SYMBOL(tcf_exts_destroy);
2377
2378 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
2379                       struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
2380                       struct netlink_ext_ack *extack)
2381 {
2382 #ifdef CONFIG_NET_CLS_ACT
2383         {
2384                 struct tc_action *act;
2385                 size_t attr_size = 0;
2386
2387                 if (exts->police && tb[exts->police]) {
2388                         act = tcf_action_init_1(net, tp, tb[exts->police],
2389                                                 rate_tlv, "police", ovr,
2390                                                 TCA_ACT_BIND, true, extack);
2391                         if (IS_ERR(act))
2392                                 return PTR_ERR(act);
2393
2394                         act->type = exts->type = TCA_OLD_COMPAT;
2395                         exts->actions[0] = act;
2396                         exts->nr_actions = 1;
2397                 } else if (exts->action && tb[exts->action]) {
2398                         int err;
2399
2400                         err = tcf_action_init(net, tp, tb[exts->action],
2401                                               rate_tlv, NULL, ovr, TCA_ACT_BIND,
2402                                               exts->actions, &attr_size, true,
2403                                               extack);
2404                         if (err < 0)
2405                                 return err;
2406                         exts->nr_actions = err;
2407                 }
2408                 exts->net = net;
2409         }
2410 #else
2411         if ((exts->action && tb[exts->action]) ||
2412             (exts->police && tb[exts->police])) {
2413                 NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
2414                 return -EOPNOTSUPP;
2415         }
2416 #endif
2417
2418         return 0;
2419 }
2420 EXPORT_SYMBOL(tcf_exts_validate);
2421
2422 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
2423 {
2424 #ifdef CONFIG_NET_CLS_ACT
2425         struct tcf_exts old = *dst;
2426
2427         *dst = *src;
2428         tcf_exts_destroy(&old);
2429 #endif
2430 }
2431 EXPORT_SYMBOL(tcf_exts_change);
2432
2433 #ifdef CONFIG_NET_CLS_ACT
2434 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
2435 {
2436         if (exts->nr_actions == 0)
2437                 return NULL;
2438         else
2439                 return exts->actions[0];
2440 }
2441 #endif
2442
2443 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
2444 {
2445 #ifdef CONFIG_NET_CLS_ACT
2446         struct nlattr *nest;
2447
2448         if (exts->action && tcf_exts_has_actions(exts)) {
2449                 /*
2450                  * again for backward compatible mode - we want
2451                  * to work with both old and new modes of entering
2452                  * tc data even if iproute2  was newer - jhs
2453                  */
2454                 if (exts->type != TCA_OLD_COMPAT) {
2455                         nest = nla_nest_start(skb, exts->action);
2456                         if (nest == NULL)
2457                                 goto nla_put_failure;
2458
2459                         if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
2460                                 goto nla_put_failure;
2461                         nla_nest_end(skb, nest);
2462                 } else if (exts->police) {
2463                         struct tc_action *act = tcf_exts_first_act(exts);
2464                         nest = nla_nest_start(skb, exts->police);
2465                         if (nest == NULL || !act)
2466                                 goto nla_put_failure;
2467                         if (tcf_action_dump_old(skb, act, 0, 0) < 0)
2468                                 goto nla_put_failure;
2469                         nla_nest_end(skb, nest);
2470                 }
2471         }
2472         return 0;
2473
2474 nla_put_failure:
2475         nla_nest_cancel(skb, nest);
2476         return -1;
2477 #else
2478         return 0;
2479 #endif
2480 }
2481 EXPORT_SYMBOL(tcf_exts_dump);
2482
2483
2484 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
2485 {
2486 #ifdef CONFIG_NET_CLS_ACT
2487         struct tc_action *a = tcf_exts_first_act(exts);
2488         if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
2489                 return -1;
2490 #endif
2491         return 0;
2492 }
2493 EXPORT_SYMBOL(tcf_exts_dump_stats);
2494
2495 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
2496                      void *type_data, bool err_stop)
2497 {
2498         struct tcf_block_cb *block_cb;
2499         int ok_count = 0;
2500         int err;
2501
2502         /* Make sure all netdevs sharing this block are offload-capable. */
2503         if (block->nooffloaddevcnt && err_stop)
2504                 return -EOPNOTSUPP;
2505
2506         list_for_each_entry(block_cb, &block->cb_list, list) {
2507                 err = block_cb->cb(type, type_data, block_cb->cb_priv);
2508                 if (err) {
2509                         if (err_stop)
2510                                 return err;
2511                 } else {
2512                         ok_count++;
2513                 }
2514         }
2515         return ok_count;
2516 }
2517 EXPORT_SYMBOL(tc_setup_cb_call);
2518
2519 static __net_init int tcf_net_init(struct net *net)
2520 {
2521         struct tcf_net *tn = net_generic(net, tcf_net_id);
2522
2523         spin_lock_init(&tn->idr_lock);
2524         idr_init(&tn->idr);
2525         return 0;
2526 }
2527
2528 static void __net_exit tcf_net_exit(struct net *net)
2529 {
2530         struct tcf_net *tn = net_generic(net, tcf_net_id);
2531
2532         idr_destroy(&tn->idr);
2533 }
2534
2535 static struct pernet_operations tcf_net_ops = {
2536         .init = tcf_net_init,
2537         .exit = tcf_net_exit,
2538         .id   = &tcf_net_id,
2539         .size = sizeof(struct tcf_net),
2540 };
2541
2542 static int __init tc_filter_init(void)
2543 {
2544         int err;
2545
2546         tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
2547         if (!tc_filter_wq)
2548                 return -ENOMEM;
2549
2550         err = register_pernet_subsys(&tcf_net_ops);
2551         if (err)
2552                 goto err_register_pernet_subsys;
2553
2554         err = rhashtable_init(&indr_setup_block_ht,
2555                               &tc_indr_setup_block_ht_params);
2556         if (err)
2557                 goto err_rhash_setup_block_ht;
2558
2559         rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
2560         rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
2561         rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
2562                       tc_dump_tfilter, 0);
2563         rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
2564         rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
2565         rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
2566                       tc_dump_chain, 0);
2567
2568         return 0;
2569
2570 err_rhash_setup_block_ht:
2571         unregister_pernet_subsys(&tcf_net_ops);
2572 err_register_pernet_subsys:
2573         destroy_workqueue(tc_filter_wq);
2574         return err;
2575 }
2576
2577 subsys_initcall(tc_filter_init);