Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         unsigned long   tx_bytes;
62         unsigned long   tx_packets;
63         unsigned long   tx_errors;
64         unsigned long   tx_dropped;
65 };
66
67 struct teql_sched_data {
68         struct Qdisc *next;
69         struct teql_master *m;
70         struct sk_buff_head q;
71 };
72
73 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74
75 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76
77 /* "teql*" qdisc routines */
78
79 static int
80 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
81 {
82         struct net_device *dev = qdisc_dev(sch);
83         struct teql_sched_data *q = qdisc_priv(sch);
84
85         if (q->q.qlen < dev->tx_queue_len) {
86                 __skb_queue_tail(&q->q, skb);
87                 return NET_XMIT_SUCCESS;
88         }
89
90         return qdisc_drop(skb, sch, to_free);
91 }
92
93 static struct sk_buff *
94 teql_dequeue(struct Qdisc *sch)
95 {
96         struct teql_sched_data *dat = qdisc_priv(sch);
97         struct netdev_queue *dat_queue;
98         struct sk_buff *skb;
99         struct Qdisc *q;
100
101         skb = __skb_dequeue(&dat->q);
102         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103         q = rcu_dereference_bh(dat_queue->qdisc);
104
105         if (skb == NULL) {
106                 struct net_device *m = qdisc_dev(q);
107                 if (m) {
108                         dat->m->slaves = sch;
109                         netif_wake_queue(m);
110                 }
111         } else {
112                 qdisc_bstats_update(sch, skb);
113         }
114         sch->q.qlen = dat->q.qlen + q->q.qlen;
115         return skb;
116 }
117
118 static struct sk_buff *
119 teql_peek(struct Qdisc *sch)
120 {
121         /* teql is meant to be used as root qdisc */
122         return NULL;
123 }
124
125 static void
126 teql_reset(struct Qdisc *sch)
127 {
128         struct teql_sched_data *dat = qdisc_priv(sch);
129
130         skb_queue_purge(&dat->q);
131         sch->q.qlen = 0;
132 }
133
134 static void
135 teql_destroy(struct Qdisc *sch)
136 {
137         struct Qdisc *q, *prev;
138         struct teql_sched_data *dat = qdisc_priv(sch);
139         struct teql_master *master = dat->m;
140
141         prev = master->slaves;
142         if (prev) {
143                 do {
144                         q = NEXT_SLAVE(prev);
145                         if (q == sch) {
146                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
147                                 if (q == master->slaves) {
148                                         master->slaves = NEXT_SLAVE(q);
149                                         if (q == master->slaves) {
150                                                 struct netdev_queue *txq;
151                                                 spinlock_t *root_lock;
152
153                                                 txq = netdev_get_tx_queue(master->dev, 0);
154                                                 master->slaves = NULL;
155
156                                                 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
157                                                 spin_lock_bh(root_lock);
158                                                 qdisc_reset(rtnl_dereference(txq->qdisc));
159                                                 spin_unlock_bh(root_lock);
160                                         }
161                                 }
162                                 skb_queue_purge(&dat->q);
163                                 break;
164                         }
165
166                 } while ((prev = q) != master->slaves);
167         }
168 }
169
170 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
171                            struct netlink_ext_ack *extack)
172 {
173         struct net_device *dev = qdisc_dev(sch);
174         struct teql_master *m = (struct teql_master *)sch->ops;
175         struct teql_sched_data *q = qdisc_priv(sch);
176
177         if (dev->hard_header_len > m->dev->hard_header_len)
178                 return -EINVAL;
179
180         if (m->dev == dev)
181                 return -ELOOP;
182
183         q->m = m;
184
185         skb_queue_head_init(&q->q);
186
187         if (m->slaves) {
188                 if (m->dev->flags & IFF_UP) {
189                         if ((m->dev->flags & IFF_POINTOPOINT &&
190                              !(dev->flags & IFF_POINTOPOINT)) ||
191                             (m->dev->flags & IFF_BROADCAST &&
192                              !(dev->flags & IFF_BROADCAST)) ||
193                             (m->dev->flags & IFF_MULTICAST &&
194                              !(dev->flags & IFF_MULTICAST)) ||
195                             dev->mtu < m->dev->mtu)
196                                 return -EINVAL;
197                 } else {
198                         if (!(dev->flags&IFF_POINTOPOINT))
199                                 m->dev->flags &= ~IFF_POINTOPOINT;
200                         if (!(dev->flags&IFF_BROADCAST))
201                                 m->dev->flags &= ~IFF_BROADCAST;
202                         if (!(dev->flags&IFF_MULTICAST))
203                                 m->dev->flags &= ~IFF_MULTICAST;
204                         if (dev->mtu < m->dev->mtu)
205                                 m->dev->mtu = dev->mtu;
206                 }
207                 q->next = NEXT_SLAVE(m->slaves);
208                 NEXT_SLAVE(m->slaves) = sch;
209         } else {
210                 q->next = sch;
211                 m->slaves = sch;
212                 m->dev->mtu = dev->mtu;
213                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
214         }
215         return 0;
216 }
217
218
219 static int
220 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
221                struct net_device *dev, struct netdev_queue *txq,
222                struct dst_entry *dst)
223 {
224         struct neighbour *n;
225         int err = 0;
226
227         n = dst_neigh_lookup_skb(dst, skb);
228         if (!n)
229                 return -ENOENT;
230
231         if (dst->dev != dev) {
232                 struct neighbour *mn;
233
234                 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
235                 neigh_release(n);
236                 if (IS_ERR(mn))
237                         return PTR_ERR(mn);
238                 n = mn;
239         }
240
241         if (neigh_event_send(n, skb_res) == 0) {
242                 int err;
243                 char haddr[MAX_ADDR_LEN];
244
245                 neigh_ha_snapshot(haddr, n, dev);
246                 err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
247                                       haddr, NULL, skb->len);
248
249                 if (err < 0)
250                         err = -EINVAL;
251         } else {
252                 err = (skb_res == NULL) ? -EAGAIN : 1;
253         }
254         neigh_release(n);
255         return err;
256 }
257
258 static inline int teql_resolve(struct sk_buff *skb,
259                                struct sk_buff *skb_res,
260                                struct net_device *dev,
261                                struct netdev_queue *txq)
262 {
263         struct dst_entry *dst = skb_dst(skb);
264         int res;
265
266         if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
267                 return -ENODEV;
268
269         if (!dev->header_ops || !dst)
270                 return 0;
271
272         rcu_read_lock();
273         res = __teql_resolve(skb, skb_res, dev, txq, dst);
274         rcu_read_unlock();
275
276         return res;
277 }
278
279 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
280 {
281         struct teql_master *master = netdev_priv(dev);
282         struct Qdisc *start, *q;
283         int busy;
284         int nores;
285         int subq = skb_get_queue_mapping(skb);
286         struct sk_buff *skb_res = NULL;
287
288         start = master->slaves;
289
290 restart:
291         nores = 0;
292         busy = 0;
293
294         q = start;
295         if (!q)
296                 goto drop;
297
298         do {
299                 struct net_device *slave = qdisc_dev(q);
300                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
301
302                 if (slave_txq->qdisc_sleeping != q)
303                         continue;
304                 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
305                     !netif_running(slave)) {
306                         busy = 1;
307                         continue;
308                 }
309
310                 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
311                 case 0:
312                         if (__netif_tx_trylock(slave_txq)) {
313                                 unsigned int length = qdisc_pkt_len(skb);
314
315                                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
316                                     netdev_start_xmit(skb, slave, slave_txq, false) ==
317                                     NETDEV_TX_OK) {
318                                         __netif_tx_unlock(slave_txq);
319                                         master->slaves = NEXT_SLAVE(q);
320                                         netif_wake_queue(dev);
321                                         master->tx_packets++;
322                                         master->tx_bytes += length;
323                                         return NETDEV_TX_OK;
324                                 }
325                                 __netif_tx_unlock(slave_txq);
326                         }
327                         if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
328                                 busy = 1;
329                         break;
330                 case 1:
331                         master->slaves = NEXT_SLAVE(q);
332                         return NETDEV_TX_OK;
333                 default:
334                         nores = 1;
335                         break;
336                 }
337                 __skb_pull(skb, skb_network_offset(skb));
338         } while ((q = NEXT_SLAVE(q)) != start);
339
340         if (nores && skb_res == NULL) {
341                 skb_res = skb;
342                 goto restart;
343         }
344
345         if (busy) {
346                 netif_stop_queue(dev);
347                 return NETDEV_TX_BUSY;
348         }
349         master->tx_errors++;
350
351 drop:
352         master->tx_dropped++;
353         dev_kfree_skb(skb);
354         return NETDEV_TX_OK;
355 }
356
357 static int teql_master_open(struct net_device *dev)
358 {
359         struct Qdisc *q;
360         struct teql_master *m = netdev_priv(dev);
361         int mtu = 0xFFFE;
362         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
363
364         if (m->slaves == NULL)
365                 return -EUNATCH;
366
367         flags = FMASK;
368
369         q = m->slaves;
370         do {
371                 struct net_device *slave = qdisc_dev(q);
372
373                 if (slave == NULL)
374                         return -EUNATCH;
375
376                 if (slave->mtu < mtu)
377                         mtu = slave->mtu;
378                 if (slave->hard_header_len > LL_MAX_HEADER)
379                         return -EINVAL;
380
381                 /* If all the slaves are BROADCAST, master is BROADCAST
382                    If all the slaves are PtP, master is PtP
383                    Otherwise, master is NBMA.
384                  */
385                 if (!(slave->flags&IFF_POINTOPOINT))
386                         flags &= ~IFF_POINTOPOINT;
387                 if (!(slave->flags&IFF_BROADCAST))
388                         flags &= ~IFF_BROADCAST;
389                 if (!(slave->flags&IFF_MULTICAST))
390                         flags &= ~IFF_MULTICAST;
391         } while ((q = NEXT_SLAVE(q)) != m->slaves);
392
393         m->dev->mtu = mtu;
394         m->dev->flags = (m->dev->flags&~FMASK) | flags;
395         netif_start_queue(m->dev);
396         return 0;
397 }
398
399 static int teql_master_close(struct net_device *dev)
400 {
401         netif_stop_queue(dev);
402         return 0;
403 }
404
405 static void teql_master_stats64(struct net_device *dev,
406                                 struct rtnl_link_stats64 *stats)
407 {
408         struct teql_master *m = netdev_priv(dev);
409
410         stats->tx_packets       = m->tx_packets;
411         stats->tx_bytes         = m->tx_bytes;
412         stats->tx_errors        = m->tx_errors;
413         stats->tx_dropped       = m->tx_dropped;
414 }
415
416 static int teql_master_mtu(struct net_device *dev, int new_mtu)
417 {
418         struct teql_master *m = netdev_priv(dev);
419         struct Qdisc *q;
420
421         q = m->slaves;
422         if (q) {
423                 do {
424                         if (new_mtu > qdisc_dev(q)->mtu)
425                                 return -EINVAL;
426                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
427         }
428
429         dev->mtu = new_mtu;
430         return 0;
431 }
432
433 static const struct net_device_ops teql_netdev_ops = {
434         .ndo_open       = teql_master_open,
435         .ndo_stop       = teql_master_close,
436         .ndo_start_xmit = teql_master_xmit,
437         .ndo_get_stats64 = teql_master_stats64,
438         .ndo_change_mtu = teql_master_mtu,
439 };
440
441 static __init void teql_master_setup(struct net_device *dev)
442 {
443         struct teql_master *master = netdev_priv(dev);
444         struct Qdisc_ops *ops = &master->qops;
445
446         master->dev     = dev;
447         ops->priv_size  = sizeof(struct teql_sched_data);
448
449         ops->enqueue    =       teql_enqueue;
450         ops->dequeue    =       teql_dequeue;
451         ops->peek       =       teql_peek;
452         ops->init       =       teql_qdisc_init;
453         ops->reset      =       teql_reset;
454         ops->destroy    =       teql_destroy;
455         ops->owner      =       THIS_MODULE;
456
457         dev->netdev_ops =       &teql_netdev_ops;
458         dev->type               = ARPHRD_VOID;
459         dev->mtu                = 1500;
460         dev->min_mtu            = 68;
461         dev->max_mtu            = 65535;
462         dev->tx_queue_len       = 100;
463         dev->flags              = IFF_NOARP;
464         dev->hard_header_len    = LL_MAX_HEADER;
465         netif_keep_dst(dev);
466 }
467
468 static LIST_HEAD(master_dev_list);
469 static int max_equalizers = 1;
470 module_param(max_equalizers, int, 0);
471 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
472
473 static int __init teql_init(void)
474 {
475         int i;
476         int err = -ENODEV;
477
478         for (i = 0; i < max_equalizers; i++) {
479                 struct net_device *dev;
480                 struct teql_master *master;
481
482                 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
483                                    NET_NAME_UNKNOWN, teql_master_setup);
484                 if (!dev) {
485                         err = -ENOMEM;
486                         break;
487                 }
488
489                 if ((err = register_netdev(dev))) {
490                         free_netdev(dev);
491                         break;
492                 }
493
494                 master = netdev_priv(dev);
495
496                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
497                 err = register_qdisc(&master->qops);
498
499                 if (err) {
500                         unregister_netdev(dev);
501                         free_netdev(dev);
502                         break;
503                 }
504
505                 list_add_tail(&master->master_list, &master_dev_list);
506         }
507         return i ? 0 : err;
508 }
509
510 static void __exit teql_exit(void)
511 {
512         struct teql_master *master, *nxt;
513
514         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
515
516                 list_del(&master->master_list);
517
518                 unregister_qdisc(&master->qops);
519                 unregister_netdev(master->dev);
520                 free_netdev(master->dev);
521         }
522 }
523
524 module_init(teql_init);
525 module_exit(teql_exit);
526
527 MODULE_LICENSE("GPL");