Merge git://git.infradead.org/mtd-2.6
[sfrench/cifs-2.6.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24
25 /*
26    How to setup it.
27    ----------------
28
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35
36    That's all. Full PnP 8)
37
38    Applicability.
39    --------------
40
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54
55 struct teql_master
56 {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         struct net_device_stats stats;
62 };
63
64 struct teql_sched_data
65 {
66         struct Qdisc *next;
67         struct teql_master *m;
68         struct neighbour *ncache;
69         struct sk_buff_head q;
70 };
71
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
75
76 /* "teql*" qdisc routines */
77
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81         struct net_device *dev = sch->dev;
82         struct teql_sched_data *q = qdisc_priv(sch);
83
84         if (q->q.qlen < dev->tx_queue_len) {
85                 __skb_queue_tail(&q->q, skb);
86                 sch->bstats.bytes += skb->len;
87                 sch->bstats.packets++;
88                 return 0;
89         }
90
91         kfree_skb(skb);
92         sch->qstats.drops++;
93         return NET_XMIT_DROP;
94 }
95
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99         struct teql_sched_data *q = qdisc_priv(sch);
100
101         __skb_queue_head(&q->q, skb);
102         sch->qstats.requeues++;
103         return 0;
104 }
105
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109         struct teql_sched_data *dat = qdisc_priv(sch);
110         struct sk_buff *skb;
111
112         skb = __skb_dequeue(&dat->q);
113         if (skb == NULL) {
114                 struct net_device *m = dat->m->dev->qdisc->dev;
115                 if (m) {
116                         dat->m->slaves = sch;
117                         netif_wake_queue(m);
118                 }
119         }
120         sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
121         return skb;
122 }
123
124 static __inline__ void
125 teql_neigh_release(struct neighbour *n)
126 {
127         if (n)
128                 neigh_release(n);
129 }
130
131 static void
132 teql_reset(struct Qdisc* sch)
133 {
134         struct teql_sched_data *dat = qdisc_priv(sch);
135
136         skb_queue_purge(&dat->q);
137         sch->q.qlen = 0;
138         teql_neigh_release(xchg(&dat->ncache, NULL));
139 }
140
141 static void
142 teql_destroy(struct Qdisc* sch)
143 {
144         struct Qdisc *q, *prev;
145         struct teql_sched_data *dat = qdisc_priv(sch);
146         struct teql_master *master = dat->m;
147
148         if ((prev = master->slaves) != NULL) {
149                 do {
150                         q = NEXT_SLAVE(prev);
151                         if (q == sch) {
152                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
153                                 if (q == master->slaves) {
154                                         master->slaves = NEXT_SLAVE(q);
155                                         if (q == master->slaves) {
156                                                 master->slaves = NULL;
157                                                 spin_lock_bh(&master->dev->queue_lock);
158                                                 qdisc_reset(master->dev->qdisc);
159                                                 spin_unlock_bh(&master->dev->queue_lock);
160                                         }
161                                 }
162                                 skb_queue_purge(&dat->q);
163                                 teql_neigh_release(xchg(&dat->ncache, NULL));
164                                 break;
165                         }
166
167                 } while ((prev = q) != master->slaves);
168         }
169 }
170
171 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
172 {
173         struct net_device *dev = sch->dev;
174         struct teql_master *m = (struct teql_master*)sch->ops;
175         struct teql_sched_data *q = qdisc_priv(sch);
176
177         if (dev->hard_header_len > m->dev->hard_header_len)
178                 return -EINVAL;
179
180         if (m->dev == dev)
181                 return -ELOOP;
182
183         q->m = m;
184
185         skb_queue_head_init(&q->q);
186
187         if (m->slaves) {
188                 if (m->dev->flags & IFF_UP) {
189                         if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
190                             || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
191                             || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
192                             || dev->mtu < m->dev->mtu)
193                                 return -EINVAL;
194                 } else {
195                         if (!(dev->flags&IFF_POINTOPOINT))
196                                 m->dev->flags &= ~IFF_POINTOPOINT;
197                         if (!(dev->flags&IFF_BROADCAST))
198                                 m->dev->flags &= ~IFF_BROADCAST;
199                         if (!(dev->flags&IFF_MULTICAST))
200                                 m->dev->flags &= ~IFF_MULTICAST;
201                         if (dev->mtu < m->dev->mtu)
202                                 m->dev->mtu = dev->mtu;
203                 }
204                 q->next = NEXT_SLAVE(m->slaves);
205                 NEXT_SLAVE(m->slaves) = sch;
206         } else {
207                 q->next = sch;
208                 m->slaves = sch;
209                 m->dev->mtu = dev->mtu;
210                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
211         }
212         return 0;
213 }
214
215
216 static int
217 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
218 {
219         struct teql_sched_data *q = qdisc_priv(dev->qdisc);
220         struct neighbour *mn = skb->dst->neighbour;
221         struct neighbour *n = q->ncache;
222
223         if (mn->tbl == NULL)
224                 return -EINVAL;
225         if (n && n->tbl == mn->tbl &&
226             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
227                 atomic_inc(&n->refcnt);
228         } else {
229                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
230                 if (IS_ERR(n))
231                         return PTR_ERR(n);
232         }
233         if (neigh_event_send(n, skb_res) == 0) {
234                 int err;
235
236                 read_lock(&n->lock);
237                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
238                                       n->ha, NULL, skb->len);
239                 read_unlock(&n->lock);
240
241                 if (err < 0) {
242                         neigh_release(n);
243                         return -EINVAL;
244                 }
245                 teql_neigh_release(xchg(&q->ncache, n));
246                 return 0;
247         }
248         neigh_release(n);
249         return (skb_res == NULL) ? -EAGAIN : 1;
250 }
251
252 static inline int teql_resolve(struct sk_buff *skb,
253                                struct sk_buff *skb_res, struct net_device *dev)
254 {
255         if (dev->header_ops == NULL ||
256             skb->dst == NULL ||
257             skb->dst->neighbour == NULL)
258                 return 0;
259         return __teql_resolve(skb, skb_res, dev);
260 }
261
262 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
263 {
264         struct teql_master *master = netdev_priv(dev);
265         struct Qdisc *start, *q;
266         int busy;
267         int nores;
268         int len = skb->len;
269         int subq = skb_get_queue_mapping(skb);
270         struct sk_buff *skb_res = NULL;
271
272         start = master->slaves;
273
274 restart:
275         nores = 0;
276         busy = 0;
277
278         if ((q = start) == NULL)
279                 goto drop;
280
281         do {
282                 struct net_device *slave = q->dev;
283
284                 if (slave->qdisc_sleeping != q)
285                         continue;
286                 if (netif_queue_stopped(slave) ||
287                     __netif_subqueue_stopped(slave, subq) ||
288                     !netif_running(slave)) {
289                         busy = 1;
290                         continue;
291                 }
292
293                 switch (teql_resolve(skb, skb_res, slave)) {
294                 case 0:
295                         if (netif_tx_trylock(slave)) {
296                                 if (!netif_queue_stopped(slave) &&
297                                     !__netif_subqueue_stopped(slave, subq) &&
298                                     slave->hard_start_xmit(skb, slave) == 0) {
299                                         netif_tx_unlock(slave);
300                                         master->slaves = NEXT_SLAVE(q);
301                                         netif_wake_queue(dev);
302                                         master->stats.tx_packets++;
303                                         master->stats.tx_bytes += len;
304                                         return 0;
305                                 }
306                                 netif_tx_unlock(slave);
307                         }
308                         if (netif_queue_stopped(dev))
309                                 busy = 1;
310                         break;
311                 case 1:
312                         master->slaves = NEXT_SLAVE(q);
313                         return 0;
314                 default:
315                         nores = 1;
316                         break;
317                 }
318                 __skb_pull(skb, skb_network_offset(skb));
319         } while ((q = NEXT_SLAVE(q)) != start);
320
321         if (nores && skb_res == NULL) {
322                 skb_res = skb;
323                 goto restart;
324         }
325
326         if (busy) {
327                 netif_stop_queue(dev);
328                 return 1;
329         }
330         master->stats.tx_errors++;
331
332 drop:
333         master->stats.tx_dropped++;
334         dev_kfree_skb(skb);
335         return 0;
336 }
337
338 static int teql_master_open(struct net_device *dev)
339 {
340         struct Qdisc * q;
341         struct teql_master *m = netdev_priv(dev);
342         int mtu = 0xFFFE;
343         unsigned flags = IFF_NOARP|IFF_MULTICAST;
344
345         if (m->slaves == NULL)
346                 return -EUNATCH;
347
348         flags = FMASK;
349
350         q = m->slaves;
351         do {
352                 struct net_device *slave = q->dev;
353
354                 if (slave == NULL)
355                         return -EUNATCH;
356
357                 if (slave->mtu < mtu)
358                         mtu = slave->mtu;
359                 if (slave->hard_header_len > LL_MAX_HEADER)
360                         return -EINVAL;
361
362                 /* If all the slaves are BROADCAST, master is BROADCAST
363                    If all the slaves are PtP, master is PtP
364                    Otherwise, master is NBMA.
365                  */
366                 if (!(slave->flags&IFF_POINTOPOINT))
367                         flags &= ~IFF_POINTOPOINT;
368                 if (!(slave->flags&IFF_BROADCAST))
369                         flags &= ~IFF_BROADCAST;
370                 if (!(slave->flags&IFF_MULTICAST))
371                         flags &= ~IFF_MULTICAST;
372         } while ((q = NEXT_SLAVE(q)) != m->slaves);
373
374         m->dev->mtu = mtu;
375         m->dev->flags = (m->dev->flags&~FMASK) | flags;
376         netif_start_queue(m->dev);
377         return 0;
378 }
379
380 static int teql_master_close(struct net_device *dev)
381 {
382         netif_stop_queue(dev);
383         return 0;
384 }
385
386 static struct net_device_stats *teql_master_stats(struct net_device *dev)
387 {
388         struct teql_master *m = netdev_priv(dev);
389         return &m->stats;
390 }
391
392 static int teql_master_mtu(struct net_device *dev, int new_mtu)
393 {
394         struct teql_master *m = netdev_priv(dev);
395         struct Qdisc *q;
396
397         if (new_mtu < 68)
398                 return -EINVAL;
399
400         q = m->slaves;
401         if (q) {
402                 do {
403                         if (new_mtu > q->dev->mtu)
404                                 return -EINVAL;
405                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
406         }
407
408         dev->mtu = new_mtu;
409         return 0;
410 }
411
412 static __init void teql_master_setup(struct net_device *dev)
413 {
414         struct teql_master *master = netdev_priv(dev);
415         struct Qdisc_ops *ops = &master->qops;
416
417         master->dev     = dev;
418         ops->priv_size  = sizeof(struct teql_sched_data);
419
420         ops->enqueue    =       teql_enqueue;
421         ops->dequeue    =       teql_dequeue;
422         ops->requeue    =       teql_requeue;
423         ops->init       =       teql_qdisc_init;
424         ops->reset      =       teql_reset;
425         ops->destroy    =       teql_destroy;
426         ops->owner      =       THIS_MODULE;
427
428         dev->open               = teql_master_open;
429         dev->hard_start_xmit    = teql_master_xmit;
430         dev->stop               = teql_master_close;
431         dev->get_stats          = teql_master_stats;
432         dev->change_mtu         = teql_master_mtu;
433         dev->type               = ARPHRD_VOID;
434         dev->mtu                = 1500;
435         dev->tx_queue_len       = 100;
436         dev->flags              = IFF_NOARP;
437         dev->hard_header_len    = LL_MAX_HEADER;
438 }
439
440 static LIST_HEAD(master_dev_list);
441 static int max_equalizers = 1;
442 module_param(max_equalizers, int, 0);
443 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
444
445 static int __init teql_init(void)
446 {
447         int i;
448         int err = -ENODEV;
449
450         for (i = 0; i < max_equalizers; i++) {
451                 struct net_device *dev;
452                 struct teql_master *master;
453
454                 dev = alloc_netdev(sizeof(struct teql_master),
455                                   "teql%d", teql_master_setup);
456                 if (!dev) {
457                         err = -ENOMEM;
458                         break;
459                 }
460
461                 if ((err = register_netdev(dev))) {
462                         free_netdev(dev);
463                         break;
464                 }
465
466                 master = netdev_priv(dev);
467
468                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
469                 err = register_qdisc(&master->qops);
470
471                 if (err) {
472                         unregister_netdev(dev);
473                         free_netdev(dev);
474                         break;
475                 }
476
477                 list_add_tail(&master->master_list, &master_dev_list);
478         }
479         return i ? 0 : err;
480 }
481
482 static void __exit teql_exit(void)
483 {
484         struct teql_master *master, *nxt;
485
486         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
487
488                 list_del(&master->master_list);
489
490                 unregister_qdisc(&master->qops);
491                 unregister_netdev(master->dev);
492                 free_netdev(master->dev);
493         }
494 }
495
496 module_init(teql_init);
497 module_exit(teql_exit);
498
499 MODULE_LICENSE("GPL");