Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[linux-2.6.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
69
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73
74 static DEFINE_RWLOCK(mrt_lock);
75
76 /*
77  *      Multicast router control variables
78  */
79
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81
82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
83
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91
92    In this case data path is free of exclusive locks at all.
93  */
94
95 static struct kmem_cache *mrt_cachep __read_mostly;
96
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99                              struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101
102 static struct timer_list ipmr_expire_timer;
103
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
105
106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
107 {
108         struct net *net = dev_net(dev);
109
110         dev_close(dev);
111
112         dev = __dev_get_by_name(net, "tunl0");
113         if (dev) {
114                 const struct net_device_ops *ops = dev->netdev_ops;
115                 struct ifreq ifr;
116                 struct ip_tunnel_parm p;
117
118                 memset(&p, 0, sizeof(p));
119                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
120                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
121                 p.iph.version = 4;
122                 p.iph.ihl = 5;
123                 p.iph.protocol = IPPROTO_IPIP;
124                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
125                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
126
127                 if (ops->ndo_do_ioctl) {
128                         mm_segment_t oldfs = get_fs();
129
130                         set_fs(KERNEL_DS);
131                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
132                         set_fs(oldfs);
133                 }
134         }
135 }
136
137 static
138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
139 {
140         struct net_device  *dev;
141
142         dev = __dev_get_by_name(net, "tunl0");
143
144         if (dev) {
145                 const struct net_device_ops *ops = dev->netdev_ops;
146                 int err;
147                 struct ifreq ifr;
148                 struct ip_tunnel_parm p;
149                 struct in_device  *in_dev;
150
151                 memset(&p, 0, sizeof(p));
152                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
153                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
154                 p.iph.version = 4;
155                 p.iph.ihl = 5;
156                 p.iph.protocol = IPPROTO_IPIP;
157                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
158                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
159
160                 if (ops->ndo_do_ioctl) {
161                         mm_segment_t oldfs = get_fs();
162
163                         set_fs(KERNEL_DS);
164                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
165                         set_fs(oldfs);
166                 } else
167                         err = -EOPNOTSUPP;
168
169                 dev = NULL;
170
171                 if (err == 0 &&
172                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
173                         dev->flags |= IFF_MULTICAST;
174
175                         in_dev = __in_dev_get_rtnl(dev);
176                         if (in_dev == NULL)
177                                 goto failure;
178
179                         ipv4_devconf_setall(in_dev);
180                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
181
182                         if (dev_open(dev))
183                                 goto failure;
184                         dev_hold(dev);
185                 }
186         }
187         return dev;
188
189 failure:
190         /* allow the register to be completed before unregistering. */
191         rtnl_unlock();
192         rtnl_lock();
193
194         unregister_netdevice(dev);
195         return NULL;
196 }
197
198 #ifdef CONFIG_IP_PIMSM
199
200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201 {
202         struct net *net = dev_net(dev);
203
204         read_lock(&mrt_lock);
205         dev->stats.tx_bytes += skb->len;
206         dev->stats.tx_packets++;
207         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
208                           IGMPMSG_WHOLEPKT);
209         read_unlock(&mrt_lock);
210         kfree_skb(skb);
211         return NETDEV_TX_OK;
212 }
213
214 static const struct net_device_ops reg_vif_netdev_ops = {
215         .ndo_start_xmit = reg_vif_xmit,
216 };
217
218 static void reg_vif_setup(struct net_device *dev)
219 {
220         dev->type               = ARPHRD_PIMREG;
221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
222         dev->flags              = IFF_NOARP;
223         dev->netdev_ops         = &reg_vif_netdev_ops,
224         dev->destructor         = free_netdev;
225         dev->features           |= NETIF_F_NETNS_LOCAL;
226 }
227
228 static struct net_device *ipmr_reg_vif(struct net *net)
229 {
230         struct net_device *dev;
231         struct in_device *in_dev;
232
233         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
234
235         if (dev == NULL)
236                 return NULL;
237
238         dev_net_set(dev, net);
239
240         if (register_netdevice(dev)) {
241                 free_netdev(dev);
242                 return NULL;
243         }
244         dev->iflink = 0;
245
246         rcu_read_lock();
247         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
248                 rcu_read_unlock();
249                 goto failure;
250         }
251
252         ipv4_devconf_setall(in_dev);
253         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
254         rcu_read_unlock();
255
256         if (dev_open(dev))
257                 goto failure;
258
259         dev_hold(dev);
260
261         return dev;
262
263 failure:
264         /* allow the register to be completed before unregistering. */
265         rtnl_unlock();
266         rtnl_lock();
267
268         unregister_netdevice(dev);
269         return NULL;
270 }
271 #endif
272
273 /*
274  *      Delete a VIF entry
275  *      @notify: Set to 1, if the caller is a notifier_call
276  */
277
278 static int vif_delete(struct net *net, int vifi, int notify,
279                       struct list_head *head)
280 {
281         struct vif_device *v;
282         struct net_device *dev;
283         struct in_device *in_dev;
284
285         if (vifi < 0 || vifi >= net->ipv4.maxvif)
286                 return -EADDRNOTAVAIL;
287
288         v = &net->ipv4.vif_table[vifi];
289
290         write_lock_bh(&mrt_lock);
291         dev = v->dev;
292         v->dev = NULL;
293
294         if (!dev) {
295                 write_unlock_bh(&mrt_lock);
296                 return -EADDRNOTAVAIL;
297         }
298
299 #ifdef CONFIG_IP_PIMSM
300         if (vifi == net->ipv4.mroute_reg_vif_num)
301                 net->ipv4.mroute_reg_vif_num = -1;
302 #endif
303
304         if (vifi+1 == net->ipv4.maxvif) {
305                 int tmp;
306                 for (tmp=vifi-1; tmp>=0; tmp--) {
307                         if (VIF_EXISTS(net, tmp))
308                                 break;
309                 }
310                 net->ipv4.maxvif = tmp+1;
311         }
312
313         write_unlock_bh(&mrt_lock);
314
315         dev_set_allmulti(dev, -1);
316
317         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
318                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
319                 ip_rt_multicast_event(in_dev);
320         }
321
322         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
323                 unregister_netdevice_queue(dev, head);
324
325         dev_put(dev);
326         return 0;
327 }
328
329 static inline void ipmr_cache_free(struct mfc_cache *c)
330 {
331         release_net(mfc_net(c));
332         kmem_cache_free(mrt_cachep, c);
333 }
334
335 /* Destroy an unresolved cache entry, killing queued skbs
336    and reporting error to netlink readers.
337  */
338
339 static void ipmr_destroy_unres(struct mfc_cache *c)
340 {
341         struct sk_buff *skb;
342         struct nlmsgerr *e;
343         struct net *net = mfc_net(c);
344
345         atomic_dec(&net->ipv4.cache_resolve_queue_len);
346
347         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348                 if (ip_hdr(skb)->version == 0) {
349                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350                         nlh->nlmsg_type = NLMSG_ERROR;
351                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352                         skb_trim(skb, nlh->nlmsg_len);
353                         e = NLMSG_DATA(nlh);
354                         e->error = -ETIMEDOUT;
355                         memset(&e->msg, 0, sizeof(e->msg));
356
357                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358                 } else
359                         kfree_skb(skb);
360         }
361
362         ipmr_cache_free(c);
363 }
364
365
366 /* Single timer process for all the unresolved queue. */
367
368 static void ipmr_expire_process(unsigned long dummy)
369 {
370         unsigned long now;
371         unsigned long expires;
372         struct mfc_cache *c, **cp;
373
374         if (!spin_trylock(&mfc_unres_lock)) {
375                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376                 return;
377         }
378
379         if (mfc_unres_queue == NULL)
380                 goto out;
381
382         now = jiffies;
383         expires = 10*HZ;
384         cp = &mfc_unres_queue;
385
386         while ((c=*cp) != NULL) {
387                 if (time_after(c->mfc_un.unres.expires, now)) {
388                         unsigned long interval = c->mfc_un.unres.expires - now;
389                         if (interval < expires)
390                                 expires = interval;
391                         cp = &c->next;
392                         continue;
393                 }
394
395                 *cp = c->next;
396
397                 ipmr_destroy_unres(c);
398         }
399
400         if (mfc_unres_queue != NULL)
401                 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403 out:
404         spin_unlock(&mfc_unres_lock);
405 }
406
407 /* Fill oifs list. It is called under write locked mrt_lock. */
408
409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 {
411         int vifi;
412         struct net *net = mfc_net(cache);
413
414         cache->mfc_un.res.minvif = MAXVIFS;
415         cache->mfc_un.res.maxvif = 0;
416         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417
418         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419                 if (VIF_EXISTS(net, vifi) &&
420                     ttls[vifi] && ttls[vifi] < 255) {
421                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422                         if (cache->mfc_un.res.minvif > vifi)
423                                 cache->mfc_un.res.minvif = vifi;
424                         if (cache->mfc_un.res.maxvif <= vifi)
425                                 cache->mfc_un.res.maxvif = vifi + 1;
426                 }
427         }
428 }
429
430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 {
432         int vifi = vifc->vifc_vifi;
433         struct vif_device *v = &net->ipv4.vif_table[vifi];
434         struct net_device *dev;
435         struct in_device *in_dev;
436         int err;
437
438         /* Is vif busy ? */
439         if (VIF_EXISTS(net, vifi))
440                 return -EADDRINUSE;
441
442         switch (vifc->vifc_flags) {
443 #ifdef CONFIG_IP_PIMSM
444         case VIFF_REGISTER:
445                 /*
446                  * Special Purpose VIF in PIM
447                  * All the packets will be sent to the daemon
448                  */
449                 if (net->ipv4.mroute_reg_vif_num >= 0)
450                         return -EADDRINUSE;
451                 dev = ipmr_reg_vif(net);
452                 if (!dev)
453                         return -ENOBUFS;
454                 err = dev_set_allmulti(dev, 1);
455                 if (err) {
456                         unregister_netdevice(dev);
457                         dev_put(dev);
458                         return err;
459                 }
460                 break;
461 #endif
462         case VIFF_TUNNEL:
463                 dev = ipmr_new_tunnel(net, vifc);
464                 if (!dev)
465                         return -ENOBUFS;
466                 err = dev_set_allmulti(dev, 1);
467                 if (err) {
468                         ipmr_del_tunnel(dev, vifc);
469                         dev_put(dev);
470                         return err;
471                 }
472                 break;
473
474         case VIFF_USE_IFINDEX:
475         case 0:
476                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
477                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
478                         if (dev && dev->ip_ptr == NULL) {
479                                 dev_put(dev);
480                                 return -EADDRNOTAVAIL;
481                         }
482                 } else
483                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
484
485                 if (!dev)
486                         return -EADDRNOTAVAIL;
487                 err = dev_set_allmulti(dev, 1);
488                 if (err) {
489                         dev_put(dev);
490                         return err;
491                 }
492                 break;
493         default:
494                 return -EINVAL;
495         }
496
497         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
498                 dev_put(dev);
499                 return -EADDRNOTAVAIL;
500         }
501         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
502         ip_rt_multicast_event(in_dev);
503
504         /*
505          *      Fill in the VIF structures
506          */
507         v->rate_limit = vifc->vifc_rate_limit;
508         v->local = vifc->vifc_lcl_addr.s_addr;
509         v->remote = vifc->vifc_rmt_addr.s_addr;
510         v->flags = vifc->vifc_flags;
511         if (!mrtsock)
512                 v->flags |= VIFF_STATIC;
513         v->threshold = vifc->vifc_threshold;
514         v->bytes_in = 0;
515         v->bytes_out = 0;
516         v->pkt_in = 0;
517         v->pkt_out = 0;
518         v->link = dev->ifindex;
519         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
520                 v->link = dev->iflink;
521
522         /* And finish update writing critical data */
523         write_lock_bh(&mrt_lock);
524         v->dev = dev;
525 #ifdef CONFIG_IP_PIMSM
526         if (v->flags&VIFF_REGISTER)
527                 net->ipv4.mroute_reg_vif_num = vifi;
528 #endif
529         if (vifi+1 > net->ipv4.maxvif)
530                 net->ipv4.maxvif = vifi+1;
531         write_unlock_bh(&mrt_lock);
532         return 0;
533 }
534
535 static struct mfc_cache *ipmr_cache_find(struct net *net,
536                                          __be32 origin,
537                                          __be32 mcastgrp)
538 {
539         int line = MFC_HASH(mcastgrp, origin);
540         struct mfc_cache *c;
541
542         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
543                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
544                         break;
545         }
546         return c;
547 }
548
549 /*
550  *      Allocate a multicast cache entry
551  */
552 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
553 {
554         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
555         if (c == NULL)
556                 return NULL;
557         c->mfc_un.res.minvif = MAXVIFS;
558         mfc_net_set(c, net);
559         return c;
560 }
561
562 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
563 {
564         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
565         if (c == NULL)
566                 return NULL;
567         skb_queue_head_init(&c->mfc_un.unres.unresolved);
568         c->mfc_un.unres.expires = jiffies + 10*HZ;
569         mfc_net_set(c, net);
570         return c;
571 }
572
573 /*
574  *      A cache entry has gone into a resolved state from queued
575  */
576
577 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
578 {
579         struct sk_buff *skb;
580         struct nlmsgerr *e;
581
582         /*
583          *      Play the pending entries through our router
584          */
585
586         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
587                 if (ip_hdr(skb)->version == 0) {
588                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
589
590                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
591                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
592                                                   (u8 *)nlh);
593                         } else {
594                                 nlh->nlmsg_type = NLMSG_ERROR;
595                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
596                                 skb_trim(skb, nlh->nlmsg_len);
597                                 e = NLMSG_DATA(nlh);
598                                 e->error = -EMSGSIZE;
599                                 memset(&e->msg, 0, sizeof(e->msg));
600                         }
601
602                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
603                 } else
604                         ip_mr_forward(skb, c, 0);
605         }
606 }
607
608 /*
609  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
610  *      expects the following bizarre scheme.
611  *
612  *      Called under mrt_lock.
613  */
614
615 static int ipmr_cache_report(struct net *net,
616                              struct sk_buff *pkt, vifi_t vifi, int assert)
617 {
618         struct sk_buff *skb;
619         const int ihl = ip_hdrlen(pkt);
620         struct igmphdr *igmp;
621         struct igmpmsg *msg;
622         int ret;
623
624 #ifdef CONFIG_IP_PIMSM
625         if (assert == IGMPMSG_WHOLEPKT)
626                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
627         else
628 #endif
629                 skb = alloc_skb(128, GFP_ATOMIC);
630
631         if (!skb)
632                 return -ENOBUFS;
633
634 #ifdef CONFIG_IP_PIMSM
635         if (assert == IGMPMSG_WHOLEPKT) {
636                 /* Ugly, but we have no choice with this interface.
637                    Duplicate old header, fix ihl, length etc.
638                    And all this only to mangle msg->im_msgtype and
639                    to set msg->im_mbz to "mbz" :-)
640                  */
641                 skb_push(skb, sizeof(struct iphdr));
642                 skb_reset_network_header(skb);
643                 skb_reset_transport_header(skb);
644                 msg = (struct igmpmsg *)skb_network_header(skb);
645                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
646                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
647                 msg->im_mbz = 0;
648                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
649                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
650                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
651                                              sizeof(struct iphdr));
652         } else
653 #endif
654         {
655
656         /*
657          *      Copy the IP header
658          */
659
660         skb->network_header = skb->tail;
661         skb_put(skb, ihl);
662         skb_copy_to_linear_data(skb, pkt->data, ihl);
663         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
664         msg = (struct igmpmsg *)skb_network_header(skb);
665         msg->im_vif = vifi;
666         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
667
668         /*
669          *      Add our header
670          */
671
672         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
673         igmp->type      =
674         msg->im_msgtype = assert;
675         igmp->code      =       0;
676         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
677         skb->transport_header = skb->network_header;
678         }
679
680         if (net->ipv4.mroute_sk == NULL) {
681                 kfree_skb(skb);
682                 return -EINVAL;
683         }
684
685         /*
686          *      Deliver to mrouted
687          */
688         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
689         if (ret < 0) {
690                 if (net_ratelimit())
691                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
692                 kfree_skb(skb);
693         }
694
695         return ret;
696 }
697
698 /*
699  *      Queue a packet for resolution. It gets locked cache entry!
700  */
701
702 static int
703 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
704 {
705         int err;
706         struct mfc_cache *c;
707         const struct iphdr *iph = ip_hdr(skb);
708
709         spin_lock_bh(&mfc_unres_lock);
710         for (c=mfc_unres_queue; c; c=c->next) {
711                 if (net_eq(mfc_net(c), net) &&
712                     c->mfc_mcastgrp == iph->daddr &&
713                     c->mfc_origin == iph->saddr)
714                         break;
715         }
716
717         if (c == NULL) {
718                 /*
719                  *      Create a new entry if allowable
720                  */
721
722                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
723                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
724                         spin_unlock_bh(&mfc_unres_lock);
725
726                         kfree_skb(skb);
727                         return -ENOBUFS;
728                 }
729
730                 /*
731                  *      Fill in the new cache entry
732                  */
733                 c->mfc_parent   = -1;
734                 c->mfc_origin   = iph->saddr;
735                 c->mfc_mcastgrp = iph->daddr;
736
737                 /*
738                  *      Reflect first query at mrouted.
739                  */
740                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
741                 if (err < 0) {
742                         /* If the report failed throw the cache entry
743                            out - Brad Parker
744                          */
745                         spin_unlock_bh(&mfc_unres_lock);
746
747                         ipmr_cache_free(c);
748                         kfree_skb(skb);
749                         return err;
750                 }
751
752                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
753                 c->next = mfc_unres_queue;
754                 mfc_unres_queue = c;
755
756                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
757         }
758
759         /*
760          *      See if we can append the packet
761          */
762         if (c->mfc_un.unres.unresolved.qlen>3) {
763                 kfree_skb(skb);
764                 err = -ENOBUFS;
765         } else {
766                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
767                 err = 0;
768         }
769
770         spin_unlock_bh(&mfc_unres_lock);
771         return err;
772 }
773
774 /*
775  *      MFC cache manipulation by user space mroute daemon
776  */
777
778 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
779 {
780         int line;
781         struct mfc_cache *c, **cp;
782
783         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
784
785         for (cp = &net->ipv4.mfc_cache_array[line];
786              (c = *cp) != NULL; cp = &c->next) {
787                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
788                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
789                         write_lock_bh(&mrt_lock);
790                         *cp = c->next;
791                         write_unlock_bh(&mrt_lock);
792
793                         ipmr_cache_free(c);
794                         return 0;
795                 }
796         }
797         return -ENOENT;
798 }
799
800 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
801 {
802         int line;
803         struct mfc_cache *uc, *c, **cp;
804
805         if (mfc->mfcc_parent >= MAXVIFS)
806                 return -ENFILE;
807
808         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
809
810         for (cp = &net->ipv4.mfc_cache_array[line];
811              (c = *cp) != NULL; cp = &c->next) {
812                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
813                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
814                         break;
815         }
816
817         if (c != NULL) {
818                 write_lock_bh(&mrt_lock);
819                 c->mfc_parent = mfc->mfcc_parent;
820                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
821                 if (!mrtsock)
822                         c->mfc_flags |= MFC_STATIC;
823                 write_unlock_bh(&mrt_lock);
824                 return 0;
825         }
826
827         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
828                 return -EINVAL;
829
830         c = ipmr_cache_alloc(net);
831         if (c == NULL)
832                 return -ENOMEM;
833
834         c->mfc_origin = mfc->mfcc_origin.s_addr;
835         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
836         c->mfc_parent = mfc->mfcc_parent;
837         ipmr_update_thresholds(c, mfc->mfcc_ttls);
838         if (!mrtsock)
839                 c->mfc_flags |= MFC_STATIC;
840
841         write_lock_bh(&mrt_lock);
842         c->next = net->ipv4.mfc_cache_array[line];
843         net->ipv4.mfc_cache_array[line] = c;
844         write_unlock_bh(&mrt_lock);
845
846         /*
847          *      Check to see if we resolved a queued list. If so we
848          *      need to send on the frames and tidy up.
849          */
850         spin_lock_bh(&mfc_unres_lock);
851         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
852              cp = &uc->next) {
853                 if (net_eq(mfc_net(uc), net) &&
854                     uc->mfc_origin == c->mfc_origin &&
855                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
856                         *cp = uc->next;
857                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
858                         break;
859                 }
860         }
861         if (mfc_unres_queue == NULL)
862                 del_timer(&ipmr_expire_timer);
863         spin_unlock_bh(&mfc_unres_lock);
864
865         if (uc) {
866                 ipmr_cache_resolve(uc, c);
867                 ipmr_cache_free(uc);
868         }
869         return 0;
870 }
871
872 /*
873  *      Close the multicast socket, and clear the vif tables etc
874  */
875
876 static void mroute_clean_tables(struct net *net)
877 {
878         int i;
879         LIST_HEAD(list);
880
881         /*
882          *      Shut down all active vif entries
883          */
884         for (i = 0; i < net->ipv4.maxvif; i++) {
885                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
886                         vif_delete(net, i, 0, &list);
887         }
888         unregister_netdevice_many(&list);
889
890         /*
891          *      Wipe the cache
892          */
893         for (i=0; i<MFC_LINES; i++) {
894                 struct mfc_cache *c, **cp;
895
896                 cp = &net->ipv4.mfc_cache_array[i];
897                 while ((c = *cp) != NULL) {
898                         if (c->mfc_flags&MFC_STATIC) {
899                                 cp = &c->next;
900                                 continue;
901                         }
902                         write_lock_bh(&mrt_lock);
903                         *cp = c->next;
904                         write_unlock_bh(&mrt_lock);
905
906                         ipmr_cache_free(c);
907                 }
908         }
909
910         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
911                 struct mfc_cache *c, **cp;
912
913                 spin_lock_bh(&mfc_unres_lock);
914                 cp = &mfc_unres_queue;
915                 while ((c = *cp) != NULL) {
916                         if (!net_eq(mfc_net(c), net)) {
917                                 cp = &c->next;
918                                 continue;
919                         }
920                         *cp = c->next;
921
922                         ipmr_destroy_unres(c);
923                 }
924                 spin_unlock_bh(&mfc_unres_lock);
925         }
926 }
927
928 static void mrtsock_destruct(struct sock *sk)
929 {
930         struct net *net = sock_net(sk);
931
932         rtnl_lock();
933         if (sk == net->ipv4.mroute_sk) {
934                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
935
936                 write_lock_bh(&mrt_lock);
937                 net->ipv4.mroute_sk = NULL;
938                 write_unlock_bh(&mrt_lock);
939
940                 mroute_clean_tables(net);
941         }
942         rtnl_unlock();
943 }
944
945 /*
946  *      Socket options and virtual interface manipulation. The whole
947  *      virtual interface system is a complete heap, but unfortunately
948  *      that's how BSD mrouted happens to think. Maybe one day with a proper
949  *      MOSPF/PIM router set up we can clean this up.
950  */
951
952 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
953 {
954         int ret;
955         struct vifctl vif;
956         struct mfcctl mfc;
957         struct net *net = sock_net(sk);
958
959         if (optname != MRT_INIT) {
960                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
961                         return -EACCES;
962         }
963
964         switch (optname) {
965         case MRT_INIT:
966                 if (sk->sk_type != SOCK_RAW ||
967                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
968                         return -EOPNOTSUPP;
969                 if (optlen != sizeof(int))
970                         return -ENOPROTOOPT;
971
972                 rtnl_lock();
973                 if (net->ipv4.mroute_sk) {
974                         rtnl_unlock();
975                         return -EADDRINUSE;
976                 }
977
978                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
979                 if (ret == 0) {
980                         write_lock_bh(&mrt_lock);
981                         net->ipv4.mroute_sk = sk;
982                         write_unlock_bh(&mrt_lock);
983
984                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
985                 }
986                 rtnl_unlock();
987                 return ret;
988         case MRT_DONE:
989                 if (sk != net->ipv4.mroute_sk)
990                         return -EACCES;
991                 return ip_ra_control(sk, 0, NULL);
992         case MRT_ADD_VIF:
993         case MRT_DEL_VIF:
994                 if (optlen != sizeof(vif))
995                         return -EINVAL;
996                 if (copy_from_user(&vif, optval, sizeof(vif)))
997                         return -EFAULT;
998                 if (vif.vifc_vifi >= MAXVIFS)
999                         return -ENFILE;
1000                 rtnl_lock();
1001                 if (optname == MRT_ADD_VIF) {
1002                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1003                 } else {
1004                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1005                 }
1006                 rtnl_unlock();
1007                 return ret;
1008
1009                 /*
1010                  *      Manipulate the forwarding caches. These live
1011                  *      in a sort of kernel/user symbiosis.
1012                  */
1013         case MRT_ADD_MFC:
1014         case MRT_DEL_MFC:
1015                 if (optlen != sizeof(mfc))
1016                         return -EINVAL;
1017                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1018                         return -EFAULT;
1019                 rtnl_lock();
1020                 if (optname == MRT_DEL_MFC)
1021                         ret = ipmr_mfc_delete(net, &mfc);
1022                 else
1023                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1024                 rtnl_unlock();
1025                 return ret;
1026                 /*
1027                  *      Control PIM assert.
1028                  */
1029         case MRT_ASSERT:
1030         {
1031                 int v;
1032                 if (get_user(v,(int __user *)optval))
1033                         return -EFAULT;
1034                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1035                 return 0;
1036         }
1037 #ifdef CONFIG_IP_PIMSM
1038         case MRT_PIM:
1039         {
1040                 int v;
1041
1042                 if (get_user(v,(int __user *)optval))
1043                         return -EFAULT;
1044                 v = (v) ? 1 : 0;
1045
1046                 rtnl_lock();
1047                 ret = 0;
1048                 if (v != net->ipv4.mroute_do_pim) {
1049                         net->ipv4.mroute_do_pim = v;
1050                         net->ipv4.mroute_do_assert = v;
1051                 }
1052                 rtnl_unlock();
1053                 return ret;
1054         }
1055 #endif
1056         /*
1057          *      Spurious command, or MRT_VERSION which you cannot
1058          *      set.
1059          */
1060         default:
1061                 return -ENOPROTOOPT;
1062         }
1063 }
1064
1065 /*
1066  *      Getsock opt support for the multicast routing system.
1067  */
1068
1069 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1070 {
1071         int olr;
1072         int val;
1073         struct net *net = sock_net(sk);
1074
1075         if (optname != MRT_VERSION &&
1076 #ifdef CONFIG_IP_PIMSM
1077            optname!=MRT_PIM &&
1078 #endif
1079            optname!=MRT_ASSERT)
1080                 return -ENOPROTOOPT;
1081
1082         if (get_user(olr, optlen))
1083                 return -EFAULT;
1084
1085         olr = min_t(unsigned int, olr, sizeof(int));
1086         if (olr < 0)
1087                 return -EINVAL;
1088
1089         if (put_user(olr, optlen))
1090                 return -EFAULT;
1091         if (optname == MRT_VERSION)
1092                 val = 0x0305;
1093 #ifdef CONFIG_IP_PIMSM
1094         else if (optname == MRT_PIM)
1095                 val = net->ipv4.mroute_do_pim;
1096 #endif
1097         else
1098                 val = net->ipv4.mroute_do_assert;
1099         if (copy_to_user(optval, &val, olr))
1100                 return -EFAULT;
1101         return 0;
1102 }
1103
1104 /*
1105  *      The IP multicast ioctl support routines.
1106  */
1107
1108 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1109 {
1110         struct sioc_sg_req sr;
1111         struct sioc_vif_req vr;
1112         struct vif_device *vif;
1113         struct mfc_cache *c;
1114         struct net *net = sock_net(sk);
1115
1116         switch (cmd) {
1117         case SIOCGETVIFCNT:
1118                 if (copy_from_user(&vr, arg, sizeof(vr)))
1119                         return -EFAULT;
1120                 if (vr.vifi >= net->ipv4.maxvif)
1121                         return -EINVAL;
1122                 read_lock(&mrt_lock);
1123                 vif = &net->ipv4.vif_table[vr.vifi];
1124                 if (VIF_EXISTS(net, vr.vifi)) {
1125                         vr.icount = vif->pkt_in;
1126                         vr.ocount = vif->pkt_out;
1127                         vr.ibytes = vif->bytes_in;
1128                         vr.obytes = vif->bytes_out;
1129                         read_unlock(&mrt_lock);
1130
1131                         if (copy_to_user(arg, &vr, sizeof(vr)))
1132                                 return -EFAULT;
1133                         return 0;
1134                 }
1135                 read_unlock(&mrt_lock);
1136                 return -EADDRNOTAVAIL;
1137         case SIOCGETSGCNT:
1138                 if (copy_from_user(&sr, arg, sizeof(sr)))
1139                         return -EFAULT;
1140
1141                 read_lock(&mrt_lock);
1142                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1143                 if (c) {
1144                         sr.pktcnt = c->mfc_un.res.pkt;
1145                         sr.bytecnt = c->mfc_un.res.bytes;
1146                         sr.wrong_if = c->mfc_un.res.wrong_if;
1147                         read_unlock(&mrt_lock);
1148
1149                         if (copy_to_user(arg, &sr, sizeof(sr)))
1150                                 return -EFAULT;
1151                         return 0;
1152                 }
1153                 read_unlock(&mrt_lock);
1154                 return -EADDRNOTAVAIL;
1155         default:
1156                 return -ENOIOCTLCMD;
1157         }
1158 }
1159
1160
1161 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1162 {
1163         struct net_device *dev = ptr;
1164         struct net *net = dev_net(dev);
1165         struct vif_device *v;
1166         int ct;
1167         LIST_HEAD(list);
1168
1169         if (event != NETDEV_UNREGISTER)
1170                 return NOTIFY_DONE;
1171         v = &net->ipv4.vif_table[0];
1172         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1173                 if (v->dev == dev)
1174                         vif_delete(net, ct, 1, &list);
1175         }
1176         unregister_netdevice_many(&list);
1177         return NOTIFY_DONE;
1178 }
1179
1180
1181 static struct notifier_block ip_mr_notifier = {
1182         .notifier_call = ipmr_device_event,
1183 };
1184
1185 /*
1186  *      Encapsulate a packet by attaching a valid IPIP header to it.
1187  *      This avoids tunnel drivers and other mess and gives us the speed so
1188  *      important for multicast video.
1189  */
1190
1191 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1192 {
1193         struct iphdr *iph;
1194         struct iphdr *old_iph = ip_hdr(skb);
1195
1196         skb_push(skb, sizeof(struct iphdr));
1197         skb->transport_header = skb->network_header;
1198         skb_reset_network_header(skb);
1199         iph = ip_hdr(skb);
1200
1201         iph->version    =       4;
1202         iph->tos        =       old_iph->tos;
1203         iph->ttl        =       old_iph->ttl;
1204         iph->frag_off   =       0;
1205         iph->daddr      =       daddr;
1206         iph->saddr      =       saddr;
1207         iph->protocol   =       IPPROTO_IPIP;
1208         iph->ihl        =       5;
1209         iph->tot_len    =       htons(skb->len);
1210         ip_select_ident(iph, skb_dst(skb), NULL);
1211         ip_send_check(iph);
1212
1213         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1214         nf_reset(skb);
1215 }
1216
1217 static inline int ipmr_forward_finish(struct sk_buff *skb)
1218 {
1219         struct ip_options * opt = &(IPCB(skb)->opt);
1220
1221         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1222
1223         if (unlikely(opt->optlen))
1224                 ip_forward_options(skb);
1225
1226         return dst_output(skb);
1227 }
1228
1229 /*
1230  *      Processing handlers for ipmr_forward
1231  */
1232
1233 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1234 {
1235         struct net *net = mfc_net(c);
1236         const struct iphdr *iph = ip_hdr(skb);
1237         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1238         struct net_device *dev;
1239         struct rtable *rt;
1240         int    encap = 0;
1241
1242         if (vif->dev == NULL)
1243                 goto out_free;
1244
1245 #ifdef CONFIG_IP_PIMSM
1246         if (vif->flags & VIFF_REGISTER) {
1247                 vif->pkt_out++;
1248                 vif->bytes_out += skb->len;
1249                 vif->dev->stats.tx_bytes += skb->len;
1250                 vif->dev->stats.tx_packets++;
1251                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1252                 goto out_free;
1253         }
1254 #endif
1255
1256         if (vif->flags&VIFF_TUNNEL) {
1257                 struct flowi fl = { .oif = vif->link,
1258                                     .nl_u = { .ip4_u =
1259                                               { .daddr = vif->remote,
1260                                                 .saddr = vif->local,
1261                                                 .tos = RT_TOS(iph->tos) } },
1262                                     .proto = IPPROTO_IPIP };
1263                 if (ip_route_output_key(net, &rt, &fl))
1264                         goto out_free;
1265                 encap = sizeof(struct iphdr);
1266         } else {
1267                 struct flowi fl = { .oif = vif->link,
1268                                     .nl_u = { .ip4_u =
1269                                               { .daddr = iph->daddr,
1270                                                 .tos = RT_TOS(iph->tos) } },
1271                                     .proto = IPPROTO_IPIP };
1272                 if (ip_route_output_key(net, &rt, &fl))
1273                         goto out_free;
1274         }
1275
1276         dev = rt->u.dst.dev;
1277
1278         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1279                 /* Do not fragment multicasts. Alas, IPv4 does not
1280                    allow to send ICMP, so that packets will disappear
1281                    to blackhole.
1282                  */
1283
1284                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1285                 ip_rt_put(rt);
1286                 goto out_free;
1287         }
1288
1289         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1290
1291         if (skb_cow(skb, encap)) {
1292                 ip_rt_put(rt);
1293                 goto out_free;
1294         }
1295
1296         vif->pkt_out++;
1297         vif->bytes_out += skb->len;
1298
1299         skb_dst_drop(skb);
1300         skb_dst_set(skb, &rt->u.dst);
1301         ip_decrease_ttl(ip_hdr(skb));
1302
1303         /* FIXME: forward and output firewalls used to be called here.
1304          * What do we do with netfilter? -- RR */
1305         if (vif->flags & VIFF_TUNNEL) {
1306                 ip_encap(skb, vif->local, vif->remote);
1307                 /* FIXME: extra output firewall step used to be here. --RR */
1308                 vif->dev->stats.tx_packets++;
1309                 vif->dev->stats.tx_bytes += skb->len;
1310         }
1311
1312         IPCB(skb)->flags |= IPSKB_FORWARDED;
1313
1314         /*
1315          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1316          * not only before forwarding, but after forwarding on all output
1317          * interfaces. It is clear, if mrouter runs a multicasting
1318          * program, it should receive packets not depending to what interface
1319          * program is joined.
1320          * If we will not make it, the program will have to join on all
1321          * interfaces. On the other hand, multihoming host (or router, but
1322          * not mrouter) cannot join to more than one interface - it will
1323          * result in receiving multiple packets.
1324          */
1325         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1326                 ipmr_forward_finish);
1327         return;
1328
1329 out_free:
1330         kfree_skb(skb);
1331         return;
1332 }
1333
1334 static int ipmr_find_vif(struct net_device *dev)
1335 {
1336         struct net *net = dev_net(dev);
1337         int ct;
1338         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1339                 if (net->ipv4.vif_table[ct].dev == dev)
1340                         break;
1341         }
1342         return ct;
1343 }
1344
1345 /* "local" means that we should preserve one skb (for local delivery) */
1346
1347 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1348 {
1349         int psend = -1;
1350         int vif, ct;
1351         struct net *net = mfc_net(cache);
1352
1353         vif = cache->mfc_parent;
1354         cache->mfc_un.res.pkt++;
1355         cache->mfc_un.res.bytes += skb->len;
1356
1357         /*
1358          * Wrong interface: drop packet and (maybe) send PIM assert.
1359          */
1360         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1361                 int true_vifi;
1362
1363                 if (skb_rtable(skb)->fl.iif == 0) {
1364                         /* It is our own packet, looped back.
1365                            Very complicated situation...
1366
1367                            The best workaround until routing daemons will be
1368                            fixed is not to redistribute packet, if it was
1369                            send through wrong interface. It means, that
1370                            multicast applications WILL NOT work for
1371                            (S,G), which have default multicast route pointing
1372                            to wrong oif. In any case, it is not a good
1373                            idea to use multicasting applications on router.
1374                          */
1375                         goto dont_forward;
1376                 }
1377
1378                 cache->mfc_un.res.wrong_if++;
1379                 true_vifi = ipmr_find_vif(skb->dev);
1380
1381                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1382                     /* pimsm uses asserts, when switching from RPT to SPT,
1383                        so that we cannot check that packet arrived on an oif.
1384                        It is bad, but otherwise we would need to move pretty
1385                        large chunk of pimd to kernel. Ough... --ANK
1386                      */
1387                     (net->ipv4.mroute_do_pim ||
1388                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1389                     time_after(jiffies,
1390                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1391                         cache->mfc_un.res.last_assert = jiffies;
1392                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1393                 }
1394                 goto dont_forward;
1395         }
1396
1397         net->ipv4.vif_table[vif].pkt_in++;
1398         net->ipv4.vif_table[vif].bytes_in += skb->len;
1399
1400         /*
1401          *      Forward the frame
1402          */
1403         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1404                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1405                         if (psend != -1) {
1406                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1407                                 if (skb2)
1408                                         ipmr_queue_xmit(skb2, cache, psend);
1409                         }
1410                         psend = ct;
1411                 }
1412         }
1413         if (psend != -1) {
1414                 if (local) {
1415                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1416                         if (skb2)
1417                                 ipmr_queue_xmit(skb2, cache, psend);
1418                 } else {
1419                         ipmr_queue_xmit(skb, cache, psend);
1420                         return 0;
1421                 }
1422         }
1423
1424 dont_forward:
1425         if (!local)
1426                 kfree_skb(skb);
1427         return 0;
1428 }
1429
1430
1431 /*
1432  *      Multicast packets for forwarding arrive here
1433  */
1434
1435 int ip_mr_input(struct sk_buff *skb)
1436 {
1437         struct mfc_cache *cache;
1438         struct net *net = dev_net(skb->dev);
1439         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1440
1441         /* Packet is looped back after forward, it should not be
1442            forwarded second time, but still can be delivered locally.
1443          */
1444         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1445                 goto dont_forward;
1446
1447         if (!local) {
1448                     if (IPCB(skb)->opt.router_alert) {
1449                             if (ip_call_ra_chain(skb))
1450                                     return 0;
1451                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1452                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1453                                Cisco IOS <= 11.2(8)) do not put router alert
1454                                option to IGMP packets destined to routable
1455                                groups. It is very bad, because it means
1456                                that we can forward NO IGMP messages.
1457                              */
1458                             read_lock(&mrt_lock);
1459                             if (net->ipv4.mroute_sk) {
1460                                     nf_reset(skb);
1461                                     raw_rcv(net->ipv4.mroute_sk, skb);
1462                                     read_unlock(&mrt_lock);
1463                                     return 0;
1464                             }
1465                             read_unlock(&mrt_lock);
1466                     }
1467         }
1468
1469         read_lock(&mrt_lock);
1470         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1471
1472         /*
1473          *      No usable cache entry
1474          */
1475         if (cache == NULL) {
1476                 int vif;
1477
1478                 if (local) {
1479                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1480                         ip_local_deliver(skb);
1481                         if (skb2 == NULL) {
1482                                 read_unlock(&mrt_lock);
1483                                 return -ENOBUFS;
1484                         }
1485                         skb = skb2;
1486                 }
1487
1488                 vif = ipmr_find_vif(skb->dev);
1489                 if (vif >= 0) {
1490                         int err = ipmr_cache_unresolved(net, vif, skb);
1491                         read_unlock(&mrt_lock);
1492
1493                         return err;
1494                 }
1495                 read_unlock(&mrt_lock);
1496                 kfree_skb(skb);
1497                 return -ENODEV;
1498         }
1499
1500         ip_mr_forward(skb, cache, local);
1501
1502         read_unlock(&mrt_lock);
1503
1504         if (local)
1505                 return ip_local_deliver(skb);
1506
1507         return 0;
1508
1509 dont_forward:
1510         if (local)
1511                 return ip_local_deliver(skb);
1512         kfree_skb(skb);
1513         return 0;
1514 }
1515
1516 #ifdef CONFIG_IP_PIMSM
1517 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1518 {
1519         struct net_device *reg_dev = NULL;
1520         struct iphdr *encap;
1521         struct net *net = dev_net(skb->dev);
1522
1523         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1524         /*
1525            Check that:
1526            a. packet is really destinted to a multicast group
1527            b. packet is not a NULL-REGISTER
1528            c. packet is not truncated
1529          */
1530         if (!ipv4_is_multicast(encap->daddr) ||
1531             encap->tot_len == 0 ||
1532             ntohs(encap->tot_len) + pimlen > skb->len)
1533                 return 1;
1534
1535         read_lock(&mrt_lock);
1536         if (net->ipv4.mroute_reg_vif_num >= 0)
1537                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1538         if (reg_dev)
1539                 dev_hold(reg_dev);
1540         read_unlock(&mrt_lock);
1541
1542         if (reg_dev == NULL)
1543                 return 1;
1544
1545         skb->mac_header = skb->network_header;
1546         skb_pull(skb, (u8*)encap - skb->data);
1547         skb_reset_network_header(skb);
1548         skb->dev = reg_dev;
1549         skb->protocol = htons(ETH_P_IP);
1550         skb->ip_summed = 0;
1551         skb->pkt_type = PACKET_HOST;
1552         skb_dst_drop(skb);
1553         reg_dev->stats.rx_bytes += skb->len;
1554         reg_dev->stats.rx_packets++;
1555         nf_reset(skb);
1556         netif_rx(skb);
1557         dev_put(reg_dev);
1558
1559         return 0;
1560 }
1561 #endif
1562
1563 #ifdef CONFIG_IP_PIMSM_V1
1564 /*
1565  * Handle IGMP messages of PIMv1
1566  */
1567
1568 int pim_rcv_v1(struct sk_buff * skb)
1569 {
1570         struct igmphdr *pim;
1571         struct net *net = dev_net(skb->dev);
1572
1573         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1574                 goto drop;
1575
1576         pim = igmp_hdr(skb);
1577
1578         if (!net->ipv4.mroute_do_pim ||
1579             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1580                 goto drop;
1581
1582         if (__pim_rcv(skb, sizeof(*pim))) {
1583 drop:
1584                 kfree_skb(skb);
1585         }
1586         return 0;
1587 }
1588 #endif
1589
1590 #ifdef CONFIG_IP_PIMSM_V2
1591 static int pim_rcv(struct sk_buff * skb)
1592 {
1593         struct pimreghdr *pim;
1594
1595         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1596                 goto drop;
1597
1598         pim = (struct pimreghdr *)skb_transport_header(skb);
1599         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1600             (pim->flags&PIM_NULL_REGISTER) ||
1601             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1602              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1603                 goto drop;
1604
1605         if (__pim_rcv(skb, sizeof(*pim))) {
1606 drop:
1607                 kfree_skb(skb);
1608         }
1609         return 0;
1610 }
1611 #endif
1612
1613 static int
1614 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1615 {
1616         int ct;
1617         struct rtnexthop *nhp;
1618         struct net *net = mfc_net(c);
1619         u8 *b = skb_tail_pointer(skb);
1620         struct rtattr *mp_head;
1621
1622         /* If cache is unresolved, don't try to parse IIF and OIF */
1623         if (c->mfc_parent > MAXVIFS)
1624                 return -ENOENT;
1625
1626         if (VIF_EXISTS(net, c->mfc_parent))
1627                 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1628
1629         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1630
1631         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1632                 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1633                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1634                                 goto rtattr_failure;
1635                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1636                         nhp->rtnh_flags = 0;
1637                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1638                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1639                         nhp->rtnh_len = sizeof(*nhp);
1640                 }
1641         }
1642         mp_head->rta_type = RTA_MULTIPATH;
1643         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1644         rtm->rtm_type = RTN_MULTICAST;
1645         return 1;
1646
1647 rtattr_failure:
1648         nlmsg_trim(skb, b);
1649         return -EMSGSIZE;
1650 }
1651
1652 int ipmr_get_route(struct net *net,
1653                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1654 {
1655         int err;
1656         struct mfc_cache *cache;
1657         struct rtable *rt = skb_rtable(skb);
1658
1659         read_lock(&mrt_lock);
1660         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1661
1662         if (cache == NULL) {
1663                 struct sk_buff *skb2;
1664                 struct iphdr *iph;
1665                 struct net_device *dev;
1666                 int vif;
1667
1668                 if (nowait) {
1669                         read_unlock(&mrt_lock);
1670                         return -EAGAIN;
1671                 }
1672
1673                 dev = skb->dev;
1674                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1675                         read_unlock(&mrt_lock);
1676                         return -ENODEV;
1677                 }
1678                 skb2 = skb_clone(skb, GFP_ATOMIC);
1679                 if (!skb2) {
1680                         read_unlock(&mrt_lock);
1681                         return -ENOMEM;
1682                 }
1683
1684                 skb_push(skb2, sizeof(struct iphdr));
1685                 skb_reset_network_header(skb2);
1686                 iph = ip_hdr(skb2);
1687                 iph->ihl = sizeof(struct iphdr) >> 2;
1688                 iph->saddr = rt->rt_src;
1689                 iph->daddr = rt->rt_dst;
1690                 iph->version = 0;
1691                 err = ipmr_cache_unresolved(net, vif, skb2);
1692                 read_unlock(&mrt_lock);
1693                 return err;
1694         }
1695
1696         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1697                 cache->mfc_flags |= MFC_NOTIFY;
1698         err = ipmr_fill_mroute(skb, cache, rtm);
1699         read_unlock(&mrt_lock);
1700         return err;
1701 }
1702
1703 #ifdef CONFIG_PROC_FS
1704 /*
1705  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1706  */
1707 struct ipmr_vif_iter {
1708         struct seq_net_private p;
1709         int ct;
1710 };
1711
1712 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1713                                            struct ipmr_vif_iter *iter,
1714                                            loff_t pos)
1715 {
1716         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1717                 if (!VIF_EXISTS(net, iter->ct))
1718                         continue;
1719                 if (pos-- == 0)
1720                         return &net->ipv4.vif_table[iter->ct];
1721         }
1722         return NULL;
1723 }
1724
1725 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1726         __acquires(mrt_lock)
1727 {
1728         struct net *net = seq_file_net(seq);
1729
1730         read_lock(&mrt_lock);
1731         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1732                 : SEQ_START_TOKEN;
1733 }
1734
1735 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1736 {
1737         struct ipmr_vif_iter *iter = seq->private;
1738         struct net *net = seq_file_net(seq);
1739
1740         ++*pos;
1741         if (v == SEQ_START_TOKEN)
1742                 return ipmr_vif_seq_idx(net, iter, 0);
1743
1744         while (++iter->ct < net->ipv4.maxvif) {
1745                 if (!VIF_EXISTS(net, iter->ct))
1746                         continue;
1747                 return &net->ipv4.vif_table[iter->ct];
1748         }
1749         return NULL;
1750 }
1751
1752 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1753         __releases(mrt_lock)
1754 {
1755         read_unlock(&mrt_lock);
1756 }
1757
1758 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1759 {
1760         struct net *net = seq_file_net(seq);
1761
1762         if (v == SEQ_START_TOKEN) {
1763                 seq_puts(seq,
1764                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1765         } else {
1766                 const struct vif_device *vif = v;
1767                 const char *name =  vif->dev ? vif->dev->name : "none";
1768
1769                 seq_printf(seq,
1770                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1771                            vif - net->ipv4.vif_table,
1772                            name, vif->bytes_in, vif->pkt_in,
1773                            vif->bytes_out, vif->pkt_out,
1774                            vif->flags, vif->local, vif->remote);
1775         }
1776         return 0;
1777 }
1778
1779 static const struct seq_operations ipmr_vif_seq_ops = {
1780         .start = ipmr_vif_seq_start,
1781         .next  = ipmr_vif_seq_next,
1782         .stop  = ipmr_vif_seq_stop,
1783         .show  = ipmr_vif_seq_show,
1784 };
1785
1786 static int ipmr_vif_open(struct inode *inode, struct file *file)
1787 {
1788         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1789                             sizeof(struct ipmr_vif_iter));
1790 }
1791
1792 static const struct file_operations ipmr_vif_fops = {
1793         .owner   = THIS_MODULE,
1794         .open    = ipmr_vif_open,
1795         .read    = seq_read,
1796         .llseek  = seq_lseek,
1797         .release = seq_release_net,
1798 };
1799
1800 struct ipmr_mfc_iter {
1801         struct seq_net_private p;
1802         struct mfc_cache **cache;
1803         int ct;
1804 };
1805
1806
1807 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1808                                           struct ipmr_mfc_iter *it, loff_t pos)
1809 {
1810         struct mfc_cache *mfc;
1811
1812         it->cache = net->ipv4.mfc_cache_array;
1813         read_lock(&mrt_lock);
1814         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1815                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1816                      mfc; mfc = mfc->next)
1817                         if (pos-- == 0)
1818                                 return mfc;
1819         read_unlock(&mrt_lock);
1820
1821         it->cache = &mfc_unres_queue;
1822         spin_lock_bh(&mfc_unres_lock);
1823         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1824                 if (net_eq(mfc_net(mfc), net) &&
1825                     pos-- == 0)
1826                         return mfc;
1827         spin_unlock_bh(&mfc_unres_lock);
1828
1829         it->cache = NULL;
1830         return NULL;
1831 }
1832
1833
1834 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1835 {
1836         struct ipmr_mfc_iter *it = seq->private;
1837         struct net *net = seq_file_net(seq);
1838
1839         it->cache = NULL;
1840         it->ct = 0;
1841         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1842                 : SEQ_START_TOKEN;
1843 }
1844
1845 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1846 {
1847         struct mfc_cache *mfc = v;
1848         struct ipmr_mfc_iter *it = seq->private;
1849         struct net *net = seq_file_net(seq);
1850
1851         ++*pos;
1852
1853         if (v == SEQ_START_TOKEN)
1854                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1855
1856         if (mfc->next)
1857                 return mfc->next;
1858
1859         if (it->cache == &mfc_unres_queue)
1860                 goto end_of_list;
1861
1862         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1863
1864         while (++it->ct < MFC_LINES) {
1865                 mfc = net->ipv4.mfc_cache_array[it->ct];
1866                 if (mfc)
1867                         return mfc;
1868         }
1869
1870         /* exhausted cache_array, show unresolved */
1871         read_unlock(&mrt_lock);
1872         it->cache = &mfc_unres_queue;
1873         it->ct = 0;
1874
1875         spin_lock_bh(&mfc_unres_lock);
1876         mfc = mfc_unres_queue;
1877         while (mfc && !net_eq(mfc_net(mfc), net))
1878                 mfc = mfc->next;
1879         if (mfc)
1880                 return mfc;
1881
1882  end_of_list:
1883         spin_unlock_bh(&mfc_unres_lock);
1884         it->cache = NULL;
1885
1886         return NULL;
1887 }
1888
1889 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1890 {
1891         struct ipmr_mfc_iter *it = seq->private;
1892         struct net *net = seq_file_net(seq);
1893
1894         if (it->cache == &mfc_unres_queue)
1895                 spin_unlock_bh(&mfc_unres_lock);
1896         else if (it->cache == net->ipv4.mfc_cache_array)
1897                 read_unlock(&mrt_lock);
1898 }
1899
1900 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1901 {
1902         int n;
1903         struct net *net = seq_file_net(seq);
1904
1905         if (v == SEQ_START_TOKEN) {
1906                 seq_puts(seq,
1907                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1908         } else {
1909                 const struct mfc_cache *mfc = v;
1910                 const struct ipmr_mfc_iter *it = seq->private;
1911
1912                 seq_printf(seq, "%08lX %08lX %-3hd",
1913                            (unsigned long) mfc->mfc_mcastgrp,
1914                            (unsigned long) mfc->mfc_origin,
1915                            mfc->mfc_parent);
1916
1917                 if (it->cache != &mfc_unres_queue) {
1918                         seq_printf(seq, " %8lu %8lu %8lu",
1919                                    mfc->mfc_un.res.pkt,
1920                                    mfc->mfc_un.res.bytes,
1921                                    mfc->mfc_un.res.wrong_if);
1922                         for (n = mfc->mfc_un.res.minvif;
1923                              n < mfc->mfc_un.res.maxvif; n++ ) {
1924                                 if (VIF_EXISTS(net, n) &&
1925                                     mfc->mfc_un.res.ttls[n] < 255)
1926                                         seq_printf(seq,
1927                                            " %2d:%-3d",
1928                                            n, mfc->mfc_un.res.ttls[n]);
1929                         }
1930                 } else {
1931                         /* unresolved mfc_caches don't contain
1932                          * pkt, bytes and wrong_if values
1933                          */
1934                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1935                 }
1936                 seq_putc(seq, '\n');
1937         }
1938         return 0;
1939 }
1940
1941 static const struct seq_operations ipmr_mfc_seq_ops = {
1942         .start = ipmr_mfc_seq_start,
1943         .next  = ipmr_mfc_seq_next,
1944         .stop  = ipmr_mfc_seq_stop,
1945         .show  = ipmr_mfc_seq_show,
1946 };
1947
1948 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1949 {
1950         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1951                             sizeof(struct ipmr_mfc_iter));
1952 }
1953
1954 static const struct file_operations ipmr_mfc_fops = {
1955         .owner   = THIS_MODULE,
1956         .open    = ipmr_mfc_open,
1957         .read    = seq_read,
1958         .llseek  = seq_lseek,
1959         .release = seq_release_net,
1960 };
1961 #endif
1962
1963 #ifdef CONFIG_IP_PIMSM_V2
1964 static const struct net_protocol pim_protocol = {
1965         .handler        =       pim_rcv,
1966         .netns_ok       =       1,
1967 };
1968 #endif
1969
1970
1971 /*
1972  *      Setup for IP multicast routing
1973  */
1974 static int __net_init ipmr_net_init(struct net *net)
1975 {
1976         int err = 0;
1977
1978         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1979                                       GFP_KERNEL);
1980         if (!net->ipv4.vif_table) {
1981                 err = -ENOMEM;
1982                 goto fail;
1983         }
1984
1985         /* Forwarding cache */
1986         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1987                                             sizeof(struct mfc_cache *),
1988                                             GFP_KERNEL);
1989         if (!net->ipv4.mfc_cache_array) {
1990                 err = -ENOMEM;
1991                 goto fail_mfc_cache;
1992         }
1993
1994 #ifdef CONFIG_IP_PIMSM
1995         net->ipv4.mroute_reg_vif_num = -1;
1996 #endif
1997
1998 #ifdef CONFIG_PROC_FS
1999         err = -ENOMEM;
2000         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2001                 goto proc_vif_fail;
2002         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2003                 goto proc_cache_fail;
2004 #endif
2005         return 0;
2006
2007 #ifdef CONFIG_PROC_FS
2008 proc_cache_fail:
2009         proc_net_remove(net, "ip_mr_vif");
2010 proc_vif_fail:
2011         kfree(net->ipv4.mfc_cache_array);
2012 #endif
2013 fail_mfc_cache:
2014         kfree(net->ipv4.vif_table);
2015 fail:
2016         return err;
2017 }
2018
2019 static void __net_exit ipmr_net_exit(struct net *net)
2020 {
2021 #ifdef CONFIG_PROC_FS
2022         proc_net_remove(net, "ip_mr_cache");
2023         proc_net_remove(net, "ip_mr_vif");
2024 #endif
2025         kfree(net->ipv4.mfc_cache_array);
2026         kfree(net->ipv4.vif_table);
2027 }
2028
2029 static struct pernet_operations ipmr_net_ops = {
2030         .init = ipmr_net_init,
2031         .exit = ipmr_net_exit,
2032 };
2033
2034 int __init ip_mr_init(void)
2035 {
2036         int err;
2037
2038         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2039                                        sizeof(struct mfc_cache),
2040                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2041                                        NULL);
2042         if (!mrt_cachep)
2043                 return -ENOMEM;
2044
2045         err = register_pernet_subsys(&ipmr_net_ops);
2046         if (err)
2047                 goto reg_pernet_fail;
2048
2049         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2050         err = register_netdevice_notifier(&ip_mr_notifier);
2051         if (err)
2052                 goto reg_notif_fail;
2053 #ifdef CONFIG_IP_PIMSM_V2
2054         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2055                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2056                 err = -EAGAIN;
2057                 goto add_proto_fail;
2058         }
2059 #endif
2060         return 0;
2061
2062 #ifdef CONFIG_IP_PIMSM_V2
2063 add_proto_fail:
2064         unregister_netdevice_notifier(&ip_mr_notifier);
2065 #endif
2066 reg_notif_fail:
2067         del_timer(&ipmr_expire_timer);
2068         unregister_pernet_subsys(&ipmr_net_ops);
2069 reg_pernet_fail:
2070         kmem_cache_destroy(mrt_cachep);
2071         return err;
2072 }