Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM 1
69 #endif
70
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72    Note that the changes are semaphored via rtnl_lock.
73  */
74
75 static DEFINE_RWLOCK(mrt_lock);
76
77 /*
78  *      Multicast router control variables
79  */
80
81 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82
83 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
84
85 /* Special spinlock for queue of unresolved entries */
86 static DEFINE_SPINLOCK(mfc_unres_lock);
87
88 /* We return to original Alan's scheme. Hash table of resolved
89    entries is changed only in process context and protected
90    with weak lock mrt_lock. Queue of unresolved entries is protected
91    with strong spinlock mfc_unres_lock.
92
93    In this case data path is free of exclusive locks at all.
94  */
95
96 static struct kmem_cache *mrt_cachep __read_mostly;
97
98 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
99 static int ipmr_cache_report(struct net *net,
100                              struct sk_buff *pkt, vifi_t vifi, int assert);
101 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
102
103 static struct timer_list ipmr_expire_timer;
104
105 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106
107 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
108 {
109         struct net *net = dev_net(dev);
110
111         dev_close(dev);
112
113         dev = __dev_get_by_name(net, "tunl0");
114         if (dev) {
115                 const struct net_device_ops *ops = dev->netdev_ops;
116                 struct ifreq ifr;
117                 struct ip_tunnel_parm p;
118
119                 memset(&p, 0, sizeof(p));
120                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
121                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
122                 p.iph.version = 4;
123                 p.iph.ihl = 5;
124                 p.iph.protocol = IPPROTO_IPIP;
125                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127
128                 if (ops->ndo_do_ioctl) {
129                         mm_segment_t oldfs = get_fs();
130
131                         set_fs(KERNEL_DS);
132                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133                         set_fs(oldfs);
134                 }
135         }
136 }
137
138 static
139 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
140 {
141         struct net_device  *dev;
142
143         dev = __dev_get_by_name(net, "tunl0");
144
145         if (dev) {
146                 const struct net_device_ops *ops = dev->netdev_ops;
147                 int err;
148                 struct ifreq ifr;
149                 struct ip_tunnel_parm p;
150                 struct in_device  *in_dev;
151
152                 memset(&p, 0, sizeof(p));
153                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
154                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
155                 p.iph.version = 4;
156                 p.iph.ihl = 5;
157                 p.iph.protocol = IPPROTO_IPIP;
158                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
159                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
160
161                 if (ops->ndo_do_ioctl) {
162                         mm_segment_t oldfs = get_fs();
163
164                         set_fs(KERNEL_DS);
165                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166                         set_fs(oldfs);
167                 } else
168                         err = -EOPNOTSUPP;
169
170                 dev = NULL;
171
172                 if (err == 0 &&
173                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
174                         dev->flags |= IFF_MULTICAST;
175
176                         in_dev = __in_dev_get_rtnl(dev);
177                         if (in_dev == NULL)
178                                 goto failure;
179
180                         ipv4_devconf_setall(in_dev);
181                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
182
183                         if (dev_open(dev))
184                                 goto failure;
185                         dev_hold(dev);
186                 }
187         }
188         return dev;
189
190 failure:
191         /* allow the register to be completed before unregistering. */
192         rtnl_unlock();
193         rtnl_lock();
194
195         unregister_netdevice(dev);
196         return NULL;
197 }
198
199 #ifdef CONFIG_IP_PIMSM
200
201 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
202 {
203         struct net *net = dev_net(dev);
204
205         read_lock(&mrt_lock);
206         dev->stats.tx_bytes += skb->len;
207         dev->stats.tx_packets++;
208         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
209                           IGMPMSG_WHOLEPKT);
210         read_unlock(&mrt_lock);
211         kfree_skb(skb);
212         return NETDEV_TX_OK;
213 }
214
215 static const struct net_device_ops reg_vif_netdev_ops = {
216         .ndo_start_xmit = reg_vif_xmit,
217 };
218
219 static void reg_vif_setup(struct net_device *dev)
220 {
221         dev->type               = ARPHRD_PIMREG;
222         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
223         dev->flags              = IFF_NOARP;
224         dev->netdev_ops         = &reg_vif_netdev_ops,
225         dev->destructor         = free_netdev;
226         dev->features           |= NETIF_F_NETNS_LOCAL;
227 }
228
229 static struct net_device *ipmr_reg_vif(struct net *net)
230 {
231         struct net_device *dev;
232         struct in_device *in_dev;
233
234         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
235
236         if (dev == NULL)
237                 return NULL;
238
239         dev_net_set(dev, net);
240
241         if (register_netdevice(dev)) {
242                 free_netdev(dev);
243                 return NULL;
244         }
245         dev->iflink = 0;
246
247         rcu_read_lock();
248         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249                 rcu_read_unlock();
250                 goto failure;
251         }
252
253         ipv4_devconf_setall(in_dev);
254         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255         rcu_read_unlock();
256
257         if (dev_open(dev))
258                 goto failure;
259
260         dev_hold(dev);
261
262         return dev;
263
264 failure:
265         /* allow the register to be completed before unregistering. */
266         rtnl_unlock();
267         rtnl_lock();
268
269         unregister_netdevice(dev);
270         return NULL;
271 }
272 #endif
273
274 /*
275  *      Delete a VIF entry
276  *      @notify: Set to 1, if the caller is a notifier_call
277  */
278
279 static int vif_delete(struct net *net, int vifi, int notify,
280                       struct list_head *head)
281 {
282         struct vif_device *v;
283         struct net_device *dev;
284         struct in_device *in_dev;
285
286         if (vifi < 0 || vifi >= net->ipv4.maxvif)
287                 return -EADDRNOTAVAIL;
288
289         v = &net->ipv4.vif_table[vifi];
290
291         write_lock_bh(&mrt_lock);
292         dev = v->dev;
293         v->dev = NULL;
294
295         if (!dev) {
296                 write_unlock_bh(&mrt_lock);
297                 return -EADDRNOTAVAIL;
298         }
299
300 #ifdef CONFIG_IP_PIMSM
301         if (vifi == net->ipv4.mroute_reg_vif_num)
302                 net->ipv4.mroute_reg_vif_num = -1;
303 #endif
304
305         if (vifi+1 == net->ipv4.maxvif) {
306                 int tmp;
307                 for (tmp=vifi-1; tmp>=0; tmp--) {
308                         if (VIF_EXISTS(net, tmp))
309                                 break;
310                 }
311                 net->ipv4.maxvif = tmp+1;
312         }
313
314         write_unlock_bh(&mrt_lock);
315
316         dev_set_allmulti(dev, -1);
317
318         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
319                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
320                 ip_rt_multicast_event(in_dev);
321         }
322
323         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
324                 unregister_netdevice_queue(dev, head);
325
326         dev_put(dev);
327         return 0;
328 }
329
330 static inline void ipmr_cache_free(struct mfc_cache *c)
331 {
332         release_net(mfc_net(c));
333         kmem_cache_free(mrt_cachep, c);
334 }
335
336 /* Destroy an unresolved cache entry, killing queued skbs
337    and reporting error to netlink readers.
338  */
339
340 static void ipmr_destroy_unres(struct mfc_cache *c)
341 {
342         struct sk_buff *skb;
343         struct nlmsgerr *e;
344         struct net *net = mfc_net(c);
345
346         atomic_dec(&net->ipv4.cache_resolve_queue_len);
347
348         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
349                 if (ip_hdr(skb)->version == 0) {
350                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
351                         nlh->nlmsg_type = NLMSG_ERROR;
352                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
353                         skb_trim(skb, nlh->nlmsg_len);
354                         e = NLMSG_DATA(nlh);
355                         e->error = -ETIMEDOUT;
356                         memset(&e->msg, 0, sizeof(e->msg));
357
358                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
359                 } else
360                         kfree_skb(skb);
361         }
362
363         ipmr_cache_free(c);
364 }
365
366
367 /* Single timer process for all the unresolved queue. */
368
369 static void ipmr_expire_process(unsigned long dummy)
370 {
371         unsigned long now;
372         unsigned long expires;
373         struct mfc_cache *c, **cp;
374
375         if (!spin_trylock(&mfc_unres_lock)) {
376                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
377                 return;
378         }
379
380         if (mfc_unres_queue == NULL)
381                 goto out;
382
383         now = jiffies;
384         expires = 10*HZ;
385         cp = &mfc_unres_queue;
386
387         while ((c=*cp) != NULL) {
388                 if (time_after(c->mfc_un.unres.expires, now)) {
389                         unsigned long interval = c->mfc_un.unres.expires - now;
390                         if (interval < expires)
391                                 expires = interval;
392                         cp = &c->next;
393                         continue;
394                 }
395
396                 *cp = c->next;
397
398                 ipmr_destroy_unres(c);
399         }
400
401         if (mfc_unres_queue != NULL)
402                 mod_timer(&ipmr_expire_timer, jiffies + expires);
403
404 out:
405         spin_unlock(&mfc_unres_lock);
406 }
407
408 /* Fill oifs list. It is called under write locked mrt_lock. */
409
410 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
411 {
412         int vifi;
413         struct net *net = mfc_net(cache);
414
415         cache->mfc_un.res.minvif = MAXVIFS;
416         cache->mfc_un.res.maxvif = 0;
417         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
418
419         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
420                 if (VIF_EXISTS(net, vifi) &&
421                     ttls[vifi] && ttls[vifi] < 255) {
422                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
423                         if (cache->mfc_un.res.minvif > vifi)
424                                 cache->mfc_un.res.minvif = vifi;
425                         if (cache->mfc_un.res.maxvif <= vifi)
426                                 cache->mfc_un.res.maxvif = vifi + 1;
427                 }
428         }
429 }
430
431 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
432 {
433         int vifi = vifc->vifc_vifi;
434         struct vif_device *v = &net->ipv4.vif_table[vifi];
435         struct net_device *dev;
436         struct in_device *in_dev;
437         int err;
438
439         /* Is vif busy ? */
440         if (VIF_EXISTS(net, vifi))
441                 return -EADDRINUSE;
442
443         switch (vifc->vifc_flags) {
444 #ifdef CONFIG_IP_PIMSM
445         case VIFF_REGISTER:
446                 /*
447                  * Special Purpose VIF in PIM
448                  * All the packets will be sent to the daemon
449                  */
450                 if (net->ipv4.mroute_reg_vif_num >= 0)
451                         return -EADDRINUSE;
452                 dev = ipmr_reg_vif(net);
453                 if (!dev)
454                         return -ENOBUFS;
455                 err = dev_set_allmulti(dev, 1);
456                 if (err) {
457                         unregister_netdevice(dev);
458                         dev_put(dev);
459                         return err;
460                 }
461                 break;
462 #endif
463         case VIFF_TUNNEL:
464                 dev = ipmr_new_tunnel(net, vifc);
465                 if (!dev)
466                         return -ENOBUFS;
467                 err = dev_set_allmulti(dev, 1);
468                 if (err) {
469                         ipmr_del_tunnel(dev, vifc);
470                         dev_put(dev);
471                         return err;
472                 }
473                 break;
474
475         case VIFF_USE_IFINDEX:
476         case 0:
477                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
478                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
479                         if (dev && dev->ip_ptr == NULL) {
480                                 dev_put(dev);
481                                 return -EADDRNOTAVAIL;
482                         }
483                 } else
484                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
485
486                 if (!dev)
487                         return -EADDRNOTAVAIL;
488                 err = dev_set_allmulti(dev, 1);
489                 if (err) {
490                         dev_put(dev);
491                         return err;
492                 }
493                 break;
494         default:
495                 return -EINVAL;
496         }
497
498         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
499                 dev_put(dev);
500                 return -EADDRNOTAVAIL;
501         }
502         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
503         ip_rt_multicast_event(in_dev);
504
505         /*
506          *      Fill in the VIF structures
507          */
508         v->rate_limit = vifc->vifc_rate_limit;
509         v->local = vifc->vifc_lcl_addr.s_addr;
510         v->remote = vifc->vifc_rmt_addr.s_addr;
511         v->flags = vifc->vifc_flags;
512         if (!mrtsock)
513                 v->flags |= VIFF_STATIC;
514         v->threshold = vifc->vifc_threshold;
515         v->bytes_in = 0;
516         v->bytes_out = 0;
517         v->pkt_in = 0;
518         v->pkt_out = 0;
519         v->link = dev->ifindex;
520         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
521                 v->link = dev->iflink;
522
523         /* And finish update writing critical data */
524         write_lock_bh(&mrt_lock);
525         v->dev = dev;
526 #ifdef CONFIG_IP_PIMSM
527         if (v->flags&VIFF_REGISTER)
528                 net->ipv4.mroute_reg_vif_num = vifi;
529 #endif
530         if (vifi+1 > net->ipv4.maxvif)
531                 net->ipv4.maxvif = vifi+1;
532         write_unlock_bh(&mrt_lock);
533         return 0;
534 }
535
536 static struct mfc_cache *ipmr_cache_find(struct net *net,
537                                          __be32 origin,
538                                          __be32 mcastgrp)
539 {
540         int line = MFC_HASH(mcastgrp, origin);
541         struct mfc_cache *c;
542
543         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
544                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
545                         break;
546         }
547         return c;
548 }
549
550 /*
551  *      Allocate a multicast cache entry
552  */
553 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
554 {
555         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
556         if (c == NULL)
557                 return NULL;
558         c->mfc_un.res.minvif = MAXVIFS;
559         mfc_net_set(c, net);
560         return c;
561 }
562
563 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
564 {
565         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
566         if (c == NULL)
567                 return NULL;
568         skb_queue_head_init(&c->mfc_un.unres.unresolved);
569         c->mfc_un.unres.expires = jiffies + 10*HZ;
570         mfc_net_set(c, net);
571         return c;
572 }
573
574 /*
575  *      A cache entry has gone into a resolved state from queued
576  */
577
578 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
579 {
580         struct sk_buff *skb;
581         struct nlmsgerr *e;
582
583         /*
584          *      Play the pending entries through our router
585          */
586
587         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
588                 if (ip_hdr(skb)->version == 0) {
589                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
590
591                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
592                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
593                                                   (u8 *)nlh);
594                         } else {
595                                 nlh->nlmsg_type = NLMSG_ERROR;
596                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
597                                 skb_trim(skb, nlh->nlmsg_len);
598                                 e = NLMSG_DATA(nlh);
599                                 e->error = -EMSGSIZE;
600                                 memset(&e->msg, 0, sizeof(e->msg));
601                         }
602
603                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
604                 } else
605                         ip_mr_forward(skb, c, 0);
606         }
607 }
608
609 /*
610  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
611  *      expects the following bizarre scheme.
612  *
613  *      Called under mrt_lock.
614  */
615
616 static int ipmr_cache_report(struct net *net,
617                              struct sk_buff *pkt, vifi_t vifi, int assert)
618 {
619         struct sk_buff *skb;
620         const int ihl = ip_hdrlen(pkt);
621         struct igmphdr *igmp;
622         struct igmpmsg *msg;
623         int ret;
624
625 #ifdef CONFIG_IP_PIMSM
626         if (assert == IGMPMSG_WHOLEPKT)
627                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
628         else
629 #endif
630                 skb = alloc_skb(128, GFP_ATOMIC);
631
632         if (!skb)
633                 return -ENOBUFS;
634
635 #ifdef CONFIG_IP_PIMSM
636         if (assert == IGMPMSG_WHOLEPKT) {
637                 /* Ugly, but we have no choice with this interface.
638                    Duplicate old header, fix ihl, length etc.
639                    And all this only to mangle msg->im_msgtype and
640                    to set msg->im_mbz to "mbz" :-)
641                  */
642                 skb_push(skb, sizeof(struct iphdr));
643                 skb_reset_network_header(skb);
644                 skb_reset_transport_header(skb);
645                 msg = (struct igmpmsg *)skb_network_header(skb);
646                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
647                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
648                 msg->im_mbz = 0;
649                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
650                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
651                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
652                                              sizeof(struct iphdr));
653         } else
654 #endif
655         {
656
657         /*
658          *      Copy the IP header
659          */
660
661         skb->network_header = skb->tail;
662         skb_put(skb, ihl);
663         skb_copy_to_linear_data(skb, pkt->data, ihl);
664         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
665         msg = (struct igmpmsg *)skb_network_header(skb);
666         msg->im_vif = vifi;
667         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
668
669         /*
670          *      Add our header
671          */
672
673         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
674         igmp->type      =
675         msg->im_msgtype = assert;
676         igmp->code      =       0;
677         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
678         skb->transport_header = skb->network_header;
679         }
680
681         if (net->ipv4.mroute_sk == NULL) {
682                 kfree_skb(skb);
683                 return -EINVAL;
684         }
685
686         /*
687          *      Deliver to mrouted
688          */
689         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
690         if (ret < 0) {
691                 if (net_ratelimit())
692                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
693                 kfree_skb(skb);
694         }
695
696         return ret;
697 }
698
699 /*
700  *      Queue a packet for resolution. It gets locked cache entry!
701  */
702
703 static int
704 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
705 {
706         int err;
707         struct mfc_cache *c;
708         const struct iphdr *iph = ip_hdr(skb);
709
710         spin_lock_bh(&mfc_unres_lock);
711         for (c=mfc_unres_queue; c; c=c->next) {
712                 if (net_eq(mfc_net(c), net) &&
713                     c->mfc_mcastgrp == iph->daddr &&
714                     c->mfc_origin == iph->saddr)
715                         break;
716         }
717
718         if (c == NULL) {
719                 /*
720                  *      Create a new entry if allowable
721                  */
722
723                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
724                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
725                         spin_unlock_bh(&mfc_unres_lock);
726
727                         kfree_skb(skb);
728                         return -ENOBUFS;
729                 }
730
731                 /*
732                  *      Fill in the new cache entry
733                  */
734                 c->mfc_parent   = -1;
735                 c->mfc_origin   = iph->saddr;
736                 c->mfc_mcastgrp = iph->daddr;
737
738                 /*
739                  *      Reflect first query at mrouted.
740                  */
741                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
742                 if (err < 0) {
743                         /* If the report failed throw the cache entry
744                            out - Brad Parker
745                          */
746                         spin_unlock_bh(&mfc_unres_lock);
747
748                         ipmr_cache_free(c);
749                         kfree_skb(skb);
750                         return err;
751                 }
752
753                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
754                 c->next = mfc_unres_queue;
755                 mfc_unres_queue = c;
756
757                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) == 1)
758                         mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
759         }
760
761         /*
762          *      See if we can append the packet
763          */
764         if (c->mfc_un.unres.unresolved.qlen>3) {
765                 kfree_skb(skb);
766                 err = -ENOBUFS;
767         } else {
768                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
769                 err = 0;
770         }
771
772         spin_unlock_bh(&mfc_unres_lock);
773         return err;
774 }
775
776 /*
777  *      MFC cache manipulation by user space mroute daemon
778  */
779
780 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
781 {
782         int line;
783         struct mfc_cache *c, **cp;
784
785         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
786
787         for (cp = &net->ipv4.mfc_cache_array[line];
788              (c = *cp) != NULL; cp = &c->next) {
789                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
790                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
791                         write_lock_bh(&mrt_lock);
792                         *cp = c->next;
793                         write_unlock_bh(&mrt_lock);
794
795                         ipmr_cache_free(c);
796                         return 0;
797                 }
798         }
799         return -ENOENT;
800 }
801
802 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
803 {
804         int line;
805         struct mfc_cache *uc, *c, **cp;
806
807         if (mfc->mfcc_parent >= MAXVIFS)
808                 return -ENFILE;
809
810         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
811
812         for (cp = &net->ipv4.mfc_cache_array[line];
813              (c = *cp) != NULL; cp = &c->next) {
814                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
815                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
816                         break;
817         }
818
819         if (c != NULL) {
820                 write_lock_bh(&mrt_lock);
821                 c->mfc_parent = mfc->mfcc_parent;
822                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
823                 if (!mrtsock)
824                         c->mfc_flags |= MFC_STATIC;
825                 write_unlock_bh(&mrt_lock);
826                 return 0;
827         }
828
829         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
830                 return -EINVAL;
831
832         c = ipmr_cache_alloc(net);
833         if (c == NULL)
834                 return -ENOMEM;
835
836         c->mfc_origin = mfc->mfcc_origin.s_addr;
837         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
838         c->mfc_parent = mfc->mfcc_parent;
839         ipmr_update_thresholds(c, mfc->mfcc_ttls);
840         if (!mrtsock)
841                 c->mfc_flags |= MFC_STATIC;
842
843         write_lock_bh(&mrt_lock);
844         c->next = net->ipv4.mfc_cache_array[line];
845         net->ipv4.mfc_cache_array[line] = c;
846         write_unlock_bh(&mrt_lock);
847
848         /*
849          *      Check to see if we resolved a queued list. If so we
850          *      need to send on the frames and tidy up.
851          */
852         spin_lock_bh(&mfc_unres_lock);
853         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
854              cp = &uc->next) {
855                 if (net_eq(mfc_net(uc), net) &&
856                     uc->mfc_origin == c->mfc_origin &&
857                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
858                         *cp = uc->next;
859                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
860                         break;
861                 }
862         }
863         if (mfc_unres_queue == NULL)
864                 del_timer(&ipmr_expire_timer);
865         spin_unlock_bh(&mfc_unres_lock);
866
867         if (uc) {
868                 ipmr_cache_resolve(uc, c);
869                 ipmr_cache_free(uc);
870         }
871         return 0;
872 }
873
874 /*
875  *      Close the multicast socket, and clear the vif tables etc
876  */
877
878 static void mroute_clean_tables(struct net *net)
879 {
880         int i;
881         LIST_HEAD(list);
882
883         /*
884          *      Shut down all active vif entries
885          */
886         for (i = 0; i < net->ipv4.maxvif; i++) {
887                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
888                         vif_delete(net, i, 0, &list);
889         }
890         unregister_netdevice_many(&list);
891
892         /*
893          *      Wipe the cache
894          */
895         for (i=0; i<MFC_LINES; i++) {
896                 struct mfc_cache *c, **cp;
897
898                 cp = &net->ipv4.mfc_cache_array[i];
899                 while ((c = *cp) != NULL) {
900                         if (c->mfc_flags&MFC_STATIC) {
901                                 cp = &c->next;
902                                 continue;
903                         }
904                         write_lock_bh(&mrt_lock);
905                         *cp = c->next;
906                         write_unlock_bh(&mrt_lock);
907
908                         ipmr_cache_free(c);
909                 }
910         }
911
912         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
913                 struct mfc_cache *c, **cp;
914
915                 spin_lock_bh(&mfc_unres_lock);
916                 cp = &mfc_unres_queue;
917                 while ((c = *cp) != NULL) {
918                         if (!net_eq(mfc_net(c), net)) {
919                                 cp = &c->next;
920                                 continue;
921                         }
922                         *cp = c->next;
923
924                         ipmr_destroy_unres(c);
925                 }
926                 spin_unlock_bh(&mfc_unres_lock);
927         }
928 }
929
930 static void mrtsock_destruct(struct sock *sk)
931 {
932         struct net *net = sock_net(sk);
933
934         rtnl_lock();
935         if (sk == net->ipv4.mroute_sk) {
936                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
937
938                 write_lock_bh(&mrt_lock);
939                 net->ipv4.mroute_sk = NULL;
940                 write_unlock_bh(&mrt_lock);
941
942                 mroute_clean_tables(net);
943         }
944         rtnl_unlock();
945 }
946
947 /*
948  *      Socket options and virtual interface manipulation. The whole
949  *      virtual interface system is a complete heap, but unfortunately
950  *      that's how BSD mrouted happens to think. Maybe one day with a proper
951  *      MOSPF/PIM router set up we can clean this up.
952  */
953
954 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
955 {
956         int ret;
957         struct vifctl vif;
958         struct mfcctl mfc;
959         struct net *net = sock_net(sk);
960
961         if (optname != MRT_INIT) {
962                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
963                         return -EACCES;
964         }
965
966         switch (optname) {
967         case MRT_INIT:
968                 if (sk->sk_type != SOCK_RAW ||
969                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
970                         return -EOPNOTSUPP;
971                 if (optlen != sizeof(int))
972                         return -ENOPROTOOPT;
973
974                 rtnl_lock();
975                 if (net->ipv4.mroute_sk) {
976                         rtnl_unlock();
977                         return -EADDRINUSE;
978                 }
979
980                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
981                 if (ret == 0) {
982                         write_lock_bh(&mrt_lock);
983                         net->ipv4.mroute_sk = sk;
984                         write_unlock_bh(&mrt_lock);
985
986                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
987                 }
988                 rtnl_unlock();
989                 return ret;
990         case MRT_DONE:
991                 if (sk != net->ipv4.mroute_sk)
992                         return -EACCES;
993                 return ip_ra_control(sk, 0, NULL);
994         case MRT_ADD_VIF:
995         case MRT_DEL_VIF:
996                 if (optlen != sizeof(vif))
997                         return -EINVAL;
998                 if (copy_from_user(&vif, optval, sizeof(vif)))
999                         return -EFAULT;
1000                 if (vif.vifc_vifi >= MAXVIFS)
1001                         return -ENFILE;
1002                 rtnl_lock();
1003                 if (optname == MRT_ADD_VIF) {
1004                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1005                 } else {
1006                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1007                 }
1008                 rtnl_unlock();
1009                 return ret;
1010
1011                 /*
1012                  *      Manipulate the forwarding caches. These live
1013                  *      in a sort of kernel/user symbiosis.
1014                  */
1015         case MRT_ADD_MFC:
1016         case MRT_DEL_MFC:
1017                 if (optlen != sizeof(mfc))
1018                         return -EINVAL;
1019                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1020                         return -EFAULT;
1021                 rtnl_lock();
1022                 if (optname == MRT_DEL_MFC)
1023                         ret = ipmr_mfc_delete(net, &mfc);
1024                 else
1025                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1026                 rtnl_unlock();
1027                 return ret;
1028                 /*
1029                  *      Control PIM assert.
1030                  */
1031         case MRT_ASSERT:
1032         {
1033                 int v;
1034                 if (get_user(v,(int __user *)optval))
1035                         return -EFAULT;
1036                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1037                 return 0;
1038         }
1039 #ifdef CONFIG_IP_PIMSM
1040         case MRT_PIM:
1041         {
1042                 int v;
1043
1044                 if (get_user(v,(int __user *)optval))
1045                         return -EFAULT;
1046                 v = (v) ? 1 : 0;
1047
1048                 rtnl_lock();
1049                 ret = 0;
1050                 if (v != net->ipv4.mroute_do_pim) {
1051                         net->ipv4.mroute_do_pim = v;
1052                         net->ipv4.mroute_do_assert = v;
1053                 }
1054                 rtnl_unlock();
1055                 return ret;
1056         }
1057 #endif
1058         /*
1059          *      Spurious command, or MRT_VERSION which you cannot
1060          *      set.
1061          */
1062         default:
1063                 return -ENOPROTOOPT;
1064         }
1065 }
1066
1067 /*
1068  *      Getsock opt support for the multicast routing system.
1069  */
1070
1071 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1072 {
1073         int olr;
1074         int val;
1075         struct net *net = sock_net(sk);
1076
1077         if (optname != MRT_VERSION &&
1078 #ifdef CONFIG_IP_PIMSM
1079            optname!=MRT_PIM &&
1080 #endif
1081            optname!=MRT_ASSERT)
1082                 return -ENOPROTOOPT;
1083
1084         if (get_user(olr, optlen))
1085                 return -EFAULT;
1086
1087         olr = min_t(unsigned int, olr, sizeof(int));
1088         if (olr < 0)
1089                 return -EINVAL;
1090
1091         if (put_user(olr, optlen))
1092                 return -EFAULT;
1093         if (optname == MRT_VERSION)
1094                 val = 0x0305;
1095 #ifdef CONFIG_IP_PIMSM
1096         else if (optname == MRT_PIM)
1097                 val = net->ipv4.mroute_do_pim;
1098 #endif
1099         else
1100                 val = net->ipv4.mroute_do_assert;
1101         if (copy_to_user(optval, &val, olr))
1102                 return -EFAULT;
1103         return 0;
1104 }
1105
1106 /*
1107  *      The IP multicast ioctl support routines.
1108  */
1109
1110 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1111 {
1112         struct sioc_sg_req sr;
1113         struct sioc_vif_req vr;
1114         struct vif_device *vif;
1115         struct mfc_cache *c;
1116         struct net *net = sock_net(sk);
1117
1118         switch (cmd) {
1119         case SIOCGETVIFCNT:
1120                 if (copy_from_user(&vr, arg, sizeof(vr)))
1121                         return -EFAULT;
1122                 if (vr.vifi >= net->ipv4.maxvif)
1123                         return -EINVAL;
1124                 read_lock(&mrt_lock);
1125                 vif = &net->ipv4.vif_table[vr.vifi];
1126                 if (VIF_EXISTS(net, vr.vifi)) {
1127                         vr.icount = vif->pkt_in;
1128                         vr.ocount = vif->pkt_out;
1129                         vr.ibytes = vif->bytes_in;
1130                         vr.obytes = vif->bytes_out;
1131                         read_unlock(&mrt_lock);
1132
1133                         if (copy_to_user(arg, &vr, sizeof(vr)))
1134                                 return -EFAULT;
1135                         return 0;
1136                 }
1137                 read_unlock(&mrt_lock);
1138                 return -EADDRNOTAVAIL;
1139         case SIOCGETSGCNT:
1140                 if (copy_from_user(&sr, arg, sizeof(sr)))
1141                         return -EFAULT;
1142
1143                 read_lock(&mrt_lock);
1144                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1145                 if (c) {
1146                         sr.pktcnt = c->mfc_un.res.pkt;
1147                         sr.bytecnt = c->mfc_un.res.bytes;
1148                         sr.wrong_if = c->mfc_un.res.wrong_if;
1149                         read_unlock(&mrt_lock);
1150
1151                         if (copy_to_user(arg, &sr, sizeof(sr)))
1152                                 return -EFAULT;
1153                         return 0;
1154                 }
1155                 read_unlock(&mrt_lock);
1156                 return -EADDRNOTAVAIL;
1157         default:
1158                 return -ENOIOCTLCMD;
1159         }
1160 }
1161
1162
1163 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1164 {
1165         struct net_device *dev = ptr;
1166         struct net *net = dev_net(dev);
1167         struct vif_device *v;
1168         int ct;
1169         LIST_HEAD(list);
1170
1171         if (event != NETDEV_UNREGISTER)
1172                 return NOTIFY_DONE;
1173         v = &net->ipv4.vif_table[0];
1174         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1175                 if (v->dev == dev)
1176                         vif_delete(net, ct, 1, &list);
1177         }
1178         unregister_netdevice_many(&list);
1179         return NOTIFY_DONE;
1180 }
1181
1182
1183 static struct notifier_block ip_mr_notifier = {
1184         .notifier_call = ipmr_device_event,
1185 };
1186
1187 /*
1188  *      Encapsulate a packet by attaching a valid IPIP header to it.
1189  *      This avoids tunnel drivers and other mess and gives us the speed so
1190  *      important for multicast video.
1191  */
1192
1193 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1194 {
1195         struct iphdr *iph;
1196         struct iphdr *old_iph = ip_hdr(skb);
1197
1198         skb_push(skb, sizeof(struct iphdr));
1199         skb->transport_header = skb->network_header;
1200         skb_reset_network_header(skb);
1201         iph = ip_hdr(skb);
1202
1203         iph->version    =       4;
1204         iph->tos        =       old_iph->tos;
1205         iph->ttl        =       old_iph->ttl;
1206         iph->frag_off   =       0;
1207         iph->daddr      =       daddr;
1208         iph->saddr      =       saddr;
1209         iph->protocol   =       IPPROTO_IPIP;
1210         iph->ihl        =       5;
1211         iph->tot_len    =       htons(skb->len);
1212         ip_select_ident(iph, skb_dst(skb), NULL);
1213         ip_send_check(iph);
1214
1215         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1216         nf_reset(skb);
1217 }
1218
1219 static inline int ipmr_forward_finish(struct sk_buff *skb)
1220 {
1221         struct ip_options * opt = &(IPCB(skb)->opt);
1222
1223         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1224
1225         if (unlikely(opt->optlen))
1226                 ip_forward_options(skb);
1227
1228         return dst_output(skb);
1229 }
1230
1231 /*
1232  *      Processing handlers for ipmr_forward
1233  */
1234
1235 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1236 {
1237         struct net *net = mfc_net(c);
1238         const struct iphdr *iph = ip_hdr(skb);
1239         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1240         struct net_device *dev;
1241         struct rtable *rt;
1242         int    encap = 0;
1243
1244         if (vif->dev == NULL)
1245                 goto out_free;
1246
1247 #ifdef CONFIG_IP_PIMSM
1248         if (vif->flags & VIFF_REGISTER) {
1249                 vif->pkt_out++;
1250                 vif->bytes_out += skb->len;
1251                 vif->dev->stats.tx_bytes += skb->len;
1252                 vif->dev->stats.tx_packets++;
1253                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1254                 goto out_free;
1255         }
1256 #endif
1257
1258         if (vif->flags&VIFF_TUNNEL) {
1259                 struct flowi fl = { .oif = vif->link,
1260                                     .nl_u = { .ip4_u =
1261                                               { .daddr = vif->remote,
1262                                                 .saddr = vif->local,
1263                                                 .tos = RT_TOS(iph->tos) } },
1264                                     .proto = IPPROTO_IPIP };
1265                 if (ip_route_output_key(net, &rt, &fl))
1266                         goto out_free;
1267                 encap = sizeof(struct iphdr);
1268         } else {
1269                 struct flowi fl = { .oif = vif->link,
1270                                     .nl_u = { .ip4_u =
1271                                               { .daddr = iph->daddr,
1272                                                 .tos = RT_TOS(iph->tos) } },
1273                                     .proto = IPPROTO_IPIP };
1274                 if (ip_route_output_key(net, &rt, &fl))
1275                         goto out_free;
1276         }
1277
1278         dev = rt->u.dst.dev;
1279
1280         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1281                 /* Do not fragment multicasts. Alas, IPv4 does not
1282                    allow to send ICMP, so that packets will disappear
1283                    to blackhole.
1284                  */
1285
1286                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1287                 ip_rt_put(rt);
1288                 goto out_free;
1289         }
1290
1291         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1292
1293         if (skb_cow(skb, encap)) {
1294                 ip_rt_put(rt);
1295                 goto out_free;
1296         }
1297
1298         vif->pkt_out++;
1299         vif->bytes_out += skb->len;
1300
1301         skb_dst_drop(skb);
1302         skb_dst_set(skb, &rt->u.dst);
1303         ip_decrease_ttl(ip_hdr(skb));
1304
1305         /* FIXME: forward and output firewalls used to be called here.
1306          * What do we do with netfilter? -- RR */
1307         if (vif->flags & VIFF_TUNNEL) {
1308                 ip_encap(skb, vif->local, vif->remote);
1309                 /* FIXME: extra output firewall step used to be here. --RR */
1310                 vif->dev->stats.tx_packets++;
1311                 vif->dev->stats.tx_bytes += skb->len;
1312         }
1313
1314         IPCB(skb)->flags |= IPSKB_FORWARDED;
1315
1316         /*
1317          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1318          * not only before forwarding, but after forwarding on all output
1319          * interfaces. It is clear, if mrouter runs a multicasting
1320          * program, it should receive packets not depending to what interface
1321          * program is joined.
1322          * If we will not make it, the program will have to join on all
1323          * interfaces. On the other hand, multihoming host (or router, but
1324          * not mrouter) cannot join to more than one interface - it will
1325          * result in receiving multiple packets.
1326          */
1327         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1328                 ipmr_forward_finish);
1329         return;
1330
1331 out_free:
1332         kfree_skb(skb);
1333         return;
1334 }
1335
1336 static int ipmr_find_vif(struct net_device *dev)
1337 {
1338         struct net *net = dev_net(dev);
1339         int ct;
1340         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1341                 if (net->ipv4.vif_table[ct].dev == dev)
1342                         break;
1343         }
1344         return ct;
1345 }
1346
1347 /* "local" means that we should preserve one skb (for local delivery) */
1348
1349 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1350 {
1351         int psend = -1;
1352         int vif, ct;
1353         struct net *net = mfc_net(cache);
1354
1355         vif = cache->mfc_parent;
1356         cache->mfc_un.res.pkt++;
1357         cache->mfc_un.res.bytes += skb->len;
1358
1359         /*
1360          * Wrong interface: drop packet and (maybe) send PIM assert.
1361          */
1362         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1363                 int true_vifi;
1364
1365                 if (skb_rtable(skb)->fl.iif == 0) {
1366                         /* It is our own packet, looped back.
1367                            Very complicated situation...
1368
1369                            The best workaround until routing daemons will be
1370                            fixed is not to redistribute packet, if it was
1371                            send through wrong interface. It means, that
1372                            multicast applications WILL NOT work for
1373                            (S,G), which have default multicast route pointing
1374                            to wrong oif. In any case, it is not a good
1375                            idea to use multicasting applications on router.
1376                          */
1377                         goto dont_forward;
1378                 }
1379
1380                 cache->mfc_un.res.wrong_if++;
1381                 true_vifi = ipmr_find_vif(skb->dev);
1382
1383                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1384                     /* pimsm uses asserts, when switching from RPT to SPT,
1385                        so that we cannot check that packet arrived on an oif.
1386                        It is bad, but otherwise we would need to move pretty
1387                        large chunk of pimd to kernel. Ough... --ANK
1388                      */
1389                     (net->ipv4.mroute_do_pim ||
1390                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1391                     time_after(jiffies,
1392                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1393                         cache->mfc_un.res.last_assert = jiffies;
1394                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1395                 }
1396                 goto dont_forward;
1397         }
1398
1399         net->ipv4.vif_table[vif].pkt_in++;
1400         net->ipv4.vif_table[vif].bytes_in += skb->len;
1401
1402         /*
1403          *      Forward the frame
1404          */
1405         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1406                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1407                         if (psend != -1) {
1408                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1409                                 if (skb2)
1410                                         ipmr_queue_xmit(skb2, cache, psend);
1411                         }
1412                         psend = ct;
1413                 }
1414         }
1415         if (psend != -1) {
1416                 if (local) {
1417                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1418                         if (skb2)
1419                                 ipmr_queue_xmit(skb2, cache, psend);
1420                 } else {
1421                         ipmr_queue_xmit(skb, cache, psend);
1422                         return 0;
1423                 }
1424         }
1425
1426 dont_forward:
1427         if (!local)
1428                 kfree_skb(skb);
1429         return 0;
1430 }
1431
1432
1433 /*
1434  *      Multicast packets for forwarding arrive here
1435  */
1436
1437 int ip_mr_input(struct sk_buff *skb)
1438 {
1439         struct mfc_cache *cache;
1440         struct net *net = dev_net(skb->dev);
1441         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1442
1443         /* Packet is looped back after forward, it should not be
1444            forwarded second time, but still can be delivered locally.
1445          */
1446         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1447                 goto dont_forward;
1448
1449         if (!local) {
1450                     if (IPCB(skb)->opt.router_alert) {
1451                             if (ip_call_ra_chain(skb))
1452                                     return 0;
1453                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1454                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1455                                Cisco IOS <= 11.2(8)) do not put router alert
1456                                option to IGMP packets destined to routable
1457                                groups. It is very bad, because it means
1458                                that we can forward NO IGMP messages.
1459                              */
1460                             read_lock(&mrt_lock);
1461                             if (net->ipv4.mroute_sk) {
1462                                     nf_reset(skb);
1463                                     raw_rcv(net->ipv4.mroute_sk, skb);
1464                                     read_unlock(&mrt_lock);
1465                                     return 0;
1466                             }
1467                             read_unlock(&mrt_lock);
1468                     }
1469         }
1470
1471         read_lock(&mrt_lock);
1472         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1473
1474         /*
1475          *      No usable cache entry
1476          */
1477         if (cache == NULL) {
1478                 int vif;
1479
1480                 if (local) {
1481                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1482                         ip_local_deliver(skb);
1483                         if (skb2 == NULL) {
1484                                 read_unlock(&mrt_lock);
1485                                 return -ENOBUFS;
1486                         }
1487                         skb = skb2;
1488                 }
1489
1490                 vif = ipmr_find_vif(skb->dev);
1491                 if (vif >= 0) {
1492                         int err = ipmr_cache_unresolved(net, vif, skb);
1493                         read_unlock(&mrt_lock);
1494
1495                         return err;
1496                 }
1497                 read_unlock(&mrt_lock);
1498                 kfree_skb(skb);
1499                 return -ENODEV;
1500         }
1501
1502         ip_mr_forward(skb, cache, local);
1503
1504         read_unlock(&mrt_lock);
1505
1506         if (local)
1507                 return ip_local_deliver(skb);
1508
1509         return 0;
1510
1511 dont_forward:
1512         if (local)
1513                 return ip_local_deliver(skb);
1514         kfree_skb(skb);
1515         return 0;
1516 }
1517
1518 #ifdef CONFIG_IP_PIMSM
1519 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1520 {
1521         struct net_device *reg_dev = NULL;
1522         struct iphdr *encap;
1523         struct net *net = dev_net(skb->dev);
1524
1525         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1526         /*
1527            Check that:
1528            a. packet is really destinted to a multicast group
1529            b. packet is not a NULL-REGISTER
1530            c. packet is not truncated
1531          */
1532         if (!ipv4_is_multicast(encap->daddr) ||
1533             encap->tot_len == 0 ||
1534             ntohs(encap->tot_len) + pimlen > skb->len)
1535                 return 1;
1536
1537         read_lock(&mrt_lock);
1538         if (net->ipv4.mroute_reg_vif_num >= 0)
1539                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1540         if (reg_dev)
1541                 dev_hold(reg_dev);
1542         read_unlock(&mrt_lock);
1543
1544         if (reg_dev == NULL)
1545                 return 1;
1546
1547         skb->mac_header = skb->network_header;
1548         skb_pull(skb, (u8*)encap - skb->data);
1549         skb_reset_network_header(skb);
1550         skb->dev = reg_dev;
1551         skb->protocol = htons(ETH_P_IP);
1552         skb->ip_summed = 0;
1553         skb->pkt_type = PACKET_HOST;
1554         skb_dst_drop(skb);
1555         reg_dev->stats.rx_bytes += skb->len;
1556         reg_dev->stats.rx_packets++;
1557         nf_reset(skb);
1558         netif_rx(skb);
1559         dev_put(reg_dev);
1560
1561         return 0;
1562 }
1563 #endif
1564
1565 #ifdef CONFIG_IP_PIMSM_V1
1566 /*
1567  * Handle IGMP messages of PIMv1
1568  */
1569
1570 int pim_rcv_v1(struct sk_buff * skb)
1571 {
1572         struct igmphdr *pim;
1573         struct net *net = dev_net(skb->dev);
1574
1575         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1576                 goto drop;
1577
1578         pim = igmp_hdr(skb);
1579
1580         if (!net->ipv4.mroute_do_pim ||
1581             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1582                 goto drop;
1583
1584         if (__pim_rcv(skb, sizeof(*pim))) {
1585 drop:
1586                 kfree_skb(skb);
1587         }
1588         return 0;
1589 }
1590 #endif
1591
1592 #ifdef CONFIG_IP_PIMSM_V2
1593 static int pim_rcv(struct sk_buff * skb)
1594 {
1595         struct pimreghdr *pim;
1596
1597         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1598                 goto drop;
1599
1600         pim = (struct pimreghdr *)skb_transport_header(skb);
1601         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1602             (pim->flags&PIM_NULL_REGISTER) ||
1603             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1604              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1605                 goto drop;
1606
1607         if (__pim_rcv(skb, sizeof(*pim))) {
1608 drop:
1609                 kfree_skb(skb);
1610         }
1611         return 0;
1612 }
1613 #endif
1614
1615 static int
1616 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1617 {
1618         int ct;
1619         struct rtnexthop *nhp;
1620         struct net *net = mfc_net(c);
1621         u8 *b = skb_tail_pointer(skb);
1622         struct rtattr *mp_head;
1623
1624         /* If cache is unresolved, don't try to parse IIF and OIF */
1625         if (c->mfc_parent > MAXVIFS)
1626                 return -ENOENT;
1627
1628         if (VIF_EXISTS(net, c->mfc_parent))
1629                 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1630
1631         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1632
1633         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1634                 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1635                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1636                                 goto rtattr_failure;
1637                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1638                         nhp->rtnh_flags = 0;
1639                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1640                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1641                         nhp->rtnh_len = sizeof(*nhp);
1642                 }
1643         }
1644         mp_head->rta_type = RTA_MULTIPATH;
1645         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1646         rtm->rtm_type = RTN_MULTICAST;
1647         return 1;
1648
1649 rtattr_failure:
1650         nlmsg_trim(skb, b);
1651         return -EMSGSIZE;
1652 }
1653
1654 int ipmr_get_route(struct net *net,
1655                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1656 {
1657         int err;
1658         struct mfc_cache *cache;
1659         struct rtable *rt = skb_rtable(skb);
1660
1661         read_lock(&mrt_lock);
1662         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1663
1664         if (cache == NULL) {
1665                 struct sk_buff *skb2;
1666                 struct iphdr *iph;
1667                 struct net_device *dev;
1668                 int vif;
1669
1670                 if (nowait) {
1671                         read_unlock(&mrt_lock);
1672                         return -EAGAIN;
1673                 }
1674
1675                 dev = skb->dev;
1676                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1677                         read_unlock(&mrt_lock);
1678                         return -ENODEV;
1679                 }
1680                 skb2 = skb_clone(skb, GFP_ATOMIC);
1681                 if (!skb2) {
1682                         read_unlock(&mrt_lock);
1683                         return -ENOMEM;
1684                 }
1685
1686                 skb_push(skb2, sizeof(struct iphdr));
1687                 skb_reset_network_header(skb2);
1688                 iph = ip_hdr(skb2);
1689                 iph->ihl = sizeof(struct iphdr) >> 2;
1690                 iph->saddr = rt->rt_src;
1691                 iph->daddr = rt->rt_dst;
1692                 iph->version = 0;
1693                 err = ipmr_cache_unresolved(net, vif, skb2);
1694                 read_unlock(&mrt_lock);
1695                 return err;
1696         }
1697
1698         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1699                 cache->mfc_flags |= MFC_NOTIFY;
1700         err = ipmr_fill_mroute(skb, cache, rtm);
1701         read_unlock(&mrt_lock);
1702         return err;
1703 }
1704
1705 #ifdef CONFIG_PROC_FS
1706 /*
1707  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1708  */
1709 struct ipmr_vif_iter {
1710         struct seq_net_private p;
1711         int ct;
1712 };
1713
1714 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1715                                            struct ipmr_vif_iter *iter,
1716                                            loff_t pos)
1717 {
1718         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1719                 if (!VIF_EXISTS(net, iter->ct))
1720                         continue;
1721                 if (pos-- == 0)
1722                         return &net->ipv4.vif_table[iter->ct];
1723         }
1724         return NULL;
1725 }
1726
1727 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1728         __acquires(mrt_lock)
1729 {
1730         struct net *net = seq_file_net(seq);
1731
1732         read_lock(&mrt_lock);
1733         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1734                 : SEQ_START_TOKEN;
1735 }
1736
1737 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1738 {
1739         struct ipmr_vif_iter *iter = seq->private;
1740         struct net *net = seq_file_net(seq);
1741
1742         ++*pos;
1743         if (v == SEQ_START_TOKEN)
1744                 return ipmr_vif_seq_idx(net, iter, 0);
1745
1746         while (++iter->ct < net->ipv4.maxvif) {
1747                 if (!VIF_EXISTS(net, iter->ct))
1748                         continue;
1749                 return &net->ipv4.vif_table[iter->ct];
1750         }
1751         return NULL;
1752 }
1753
1754 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1755         __releases(mrt_lock)
1756 {
1757         read_unlock(&mrt_lock);
1758 }
1759
1760 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1761 {
1762         struct net *net = seq_file_net(seq);
1763
1764         if (v == SEQ_START_TOKEN) {
1765                 seq_puts(seq,
1766                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1767         } else {
1768                 const struct vif_device *vif = v;
1769                 const char *name =  vif->dev ? vif->dev->name : "none";
1770
1771                 seq_printf(seq,
1772                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1773                            vif - net->ipv4.vif_table,
1774                            name, vif->bytes_in, vif->pkt_in,
1775                            vif->bytes_out, vif->pkt_out,
1776                            vif->flags, vif->local, vif->remote);
1777         }
1778         return 0;
1779 }
1780
1781 static const struct seq_operations ipmr_vif_seq_ops = {
1782         .start = ipmr_vif_seq_start,
1783         .next  = ipmr_vif_seq_next,
1784         .stop  = ipmr_vif_seq_stop,
1785         .show  = ipmr_vif_seq_show,
1786 };
1787
1788 static int ipmr_vif_open(struct inode *inode, struct file *file)
1789 {
1790         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1791                             sizeof(struct ipmr_vif_iter));
1792 }
1793
1794 static const struct file_operations ipmr_vif_fops = {
1795         .owner   = THIS_MODULE,
1796         .open    = ipmr_vif_open,
1797         .read    = seq_read,
1798         .llseek  = seq_lseek,
1799         .release = seq_release_net,
1800 };
1801
1802 struct ipmr_mfc_iter {
1803         struct seq_net_private p;
1804         struct mfc_cache **cache;
1805         int ct;
1806 };
1807
1808
1809 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1810                                           struct ipmr_mfc_iter *it, loff_t pos)
1811 {
1812         struct mfc_cache *mfc;
1813
1814         it->cache = net->ipv4.mfc_cache_array;
1815         read_lock(&mrt_lock);
1816         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1817                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1818                      mfc; mfc = mfc->next)
1819                         if (pos-- == 0)
1820                                 return mfc;
1821         read_unlock(&mrt_lock);
1822
1823         it->cache = &mfc_unres_queue;
1824         spin_lock_bh(&mfc_unres_lock);
1825         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1826                 if (net_eq(mfc_net(mfc), net) &&
1827                     pos-- == 0)
1828                         return mfc;
1829         spin_unlock_bh(&mfc_unres_lock);
1830
1831         it->cache = NULL;
1832         return NULL;
1833 }
1834
1835
1836 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1837 {
1838         struct ipmr_mfc_iter *it = seq->private;
1839         struct net *net = seq_file_net(seq);
1840
1841         it->cache = NULL;
1842         it->ct = 0;
1843         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1844                 : SEQ_START_TOKEN;
1845 }
1846
1847 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1848 {
1849         struct mfc_cache *mfc = v;
1850         struct ipmr_mfc_iter *it = seq->private;
1851         struct net *net = seq_file_net(seq);
1852
1853         ++*pos;
1854
1855         if (v == SEQ_START_TOKEN)
1856                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1857
1858         if (mfc->next)
1859                 return mfc->next;
1860
1861         if (it->cache == &mfc_unres_queue)
1862                 goto end_of_list;
1863
1864         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1865
1866         while (++it->ct < MFC_LINES) {
1867                 mfc = net->ipv4.mfc_cache_array[it->ct];
1868                 if (mfc)
1869                         return mfc;
1870         }
1871
1872         /* exhausted cache_array, show unresolved */
1873         read_unlock(&mrt_lock);
1874         it->cache = &mfc_unres_queue;
1875         it->ct = 0;
1876
1877         spin_lock_bh(&mfc_unres_lock);
1878         mfc = mfc_unres_queue;
1879         while (mfc && !net_eq(mfc_net(mfc), net))
1880                 mfc = mfc->next;
1881         if (mfc)
1882                 return mfc;
1883
1884  end_of_list:
1885         spin_unlock_bh(&mfc_unres_lock);
1886         it->cache = NULL;
1887
1888         return NULL;
1889 }
1890
1891 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1892 {
1893         struct ipmr_mfc_iter *it = seq->private;
1894         struct net *net = seq_file_net(seq);
1895
1896         if (it->cache == &mfc_unres_queue)
1897                 spin_unlock_bh(&mfc_unres_lock);
1898         else if (it->cache == net->ipv4.mfc_cache_array)
1899                 read_unlock(&mrt_lock);
1900 }
1901
1902 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1903 {
1904         int n;
1905         struct net *net = seq_file_net(seq);
1906
1907         if (v == SEQ_START_TOKEN) {
1908                 seq_puts(seq,
1909                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1910         } else {
1911                 const struct mfc_cache *mfc = v;
1912                 const struct ipmr_mfc_iter *it = seq->private;
1913
1914                 seq_printf(seq, "%08lX %08lX %-3hd",
1915                            (unsigned long) mfc->mfc_mcastgrp,
1916                            (unsigned long) mfc->mfc_origin,
1917                            mfc->mfc_parent);
1918
1919                 if (it->cache != &mfc_unres_queue) {
1920                         seq_printf(seq, " %8lu %8lu %8lu",
1921                                    mfc->mfc_un.res.pkt,
1922                                    mfc->mfc_un.res.bytes,
1923                                    mfc->mfc_un.res.wrong_if);
1924                         for (n = mfc->mfc_un.res.minvif;
1925                              n < mfc->mfc_un.res.maxvif; n++ ) {
1926                                 if (VIF_EXISTS(net, n) &&
1927                                     mfc->mfc_un.res.ttls[n] < 255)
1928                                         seq_printf(seq,
1929                                            " %2d:%-3d",
1930                                            n, mfc->mfc_un.res.ttls[n]);
1931                         }
1932                 } else {
1933                         /* unresolved mfc_caches don't contain
1934                          * pkt, bytes and wrong_if values
1935                          */
1936                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1937                 }
1938                 seq_putc(seq, '\n');
1939         }
1940         return 0;
1941 }
1942
1943 static const struct seq_operations ipmr_mfc_seq_ops = {
1944         .start = ipmr_mfc_seq_start,
1945         .next  = ipmr_mfc_seq_next,
1946         .stop  = ipmr_mfc_seq_stop,
1947         .show  = ipmr_mfc_seq_show,
1948 };
1949
1950 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1951 {
1952         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1953                             sizeof(struct ipmr_mfc_iter));
1954 }
1955
1956 static const struct file_operations ipmr_mfc_fops = {
1957         .owner   = THIS_MODULE,
1958         .open    = ipmr_mfc_open,
1959         .read    = seq_read,
1960         .llseek  = seq_lseek,
1961         .release = seq_release_net,
1962 };
1963 #endif
1964
1965 #ifdef CONFIG_IP_PIMSM_V2
1966 static const struct net_protocol pim_protocol = {
1967         .handler        =       pim_rcv,
1968         .netns_ok       =       1,
1969 };
1970 #endif
1971
1972
1973 /*
1974  *      Setup for IP multicast routing
1975  */
1976 static int __net_init ipmr_net_init(struct net *net)
1977 {
1978         int err = 0;
1979
1980         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1981                                       GFP_KERNEL);
1982         if (!net->ipv4.vif_table) {
1983                 err = -ENOMEM;
1984                 goto fail;
1985         }
1986
1987         /* Forwarding cache */
1988         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1989                                             sizeof(struct mfc_cache *),
1990                                             GFP_KERNEL);
1991         if (!net->ipv4.mfc_cache_array) {
1992                 err = -ENOMEM;
1993                 goto fail_mfc_cache;
1994         }
1995
1996 #ifdef CONFIG_IP_PIMSM
1997         net->ipv4.mroute_reg_vif_num = -1;
1998 #endif
1999
2000 #ifdef CONFIG_PROC_FS
2001         err = -ENOMEM;
2002         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2003                 goto proc_vif_fail;
2004         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2005                 goto proc_cache_fail;
2006 #endif
2007         return 0;
2008
2009 #ifdef CONFIG_PROC_FS
2010 proc_cache_fail:
2011         proc_net_remove(net, "ip_mr_vif");
2012 proc_vif_fail:
2013         kfree(net->ipv4.mfc_cache_array);
2014 #endif
2015 fail_mfc_cache:
2016         kfree(net->ipv4.vif_table);
2017 fail:
2018         return err;
2019 }
2020
2021 static void __net_exit ipmr_net_exit(struct net *net)
2022 {
2023 #ifdef CONFIG_PROC_FS
2024         proc_net_remove(net, "ip_mr_cache");
2025         proc_net_remove(net, "ip_mr_vif");
2026 #endif
2027         kfree(net->ipv4.mfc_cache_array);
2028         kfree(net->ipv4.vif_table);
2029 }
2030
2031 static struct pernet_operations ipmr_net_ops = {
2032         .init = ipmr_net_init,
2033         .exit = ipmr_net_exit,
2034 };
2035
2036 int __init ip_mr_init(void)
2037 {
2038         int err;
2039
2040         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2041                                        sizeof(struct mfc_cache),
2042                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2043                                        NULL);
2044         if (!mrt_cachep)
2045                 return -ENOMEM;
2046
2047         err = register_pernet_subsys(&ipmr_net_ops);
2048         if (err)
2049                 goto reg_pernet_fail;
2050
2051         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2052         err = register_netdevice_notifier(&ip_mr_notifier);
2053         if (err)
2054                 goto reg_notif_fail;
2055 #ifdef CONFIG_IP_PIMSM_V2
2056         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2057                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2058                 err = -EAGAIN;
2059                 goto add_proto_fail;
2060         }
2061 #endif
2062         return 0;
2063
2064 #ifdef CONFIG_IP_PIMSM_V2
2065 add_proto_fail:
2066         unregister_netdevice_notifier(&ip_mr_notifier);
2067 #endif
2068 reg_notif_fail:
2069         del_timer(&ipmr_expire_timer);
2070         unregister_pernet_subsys(&ipmr_net_ops);
2071 reg_pernet_fail:
2072         kmem_cache_destroy(mrt_cachep);
2073         return err;
2074 }