Merge branch 'llseek' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/bkl
[linux-2.6.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requirement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
67
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
70 #endif
71
72 struct mr_table {
73         struct list_head        list;
74 #ifdef CONFIG_NET_NS
75         struct net              *net;
76 #endif
77         u32                     id;
78         struct sock             *mroute_sk;
79         struct timer_list       ipmr_expire_timer;
80         struct list_head        mfc_unres_queue;
81         struct list_head        mfc_cache_array[MFC_LINES];
82         struct vif_device       vif_table[MAXVIFS];
83         int                     maxvif;
84         atomic_t                cache_resolve_queue_len;
85         int                     mroute_do_assert;
86         int                     mroute_do_pim;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88         int                     mroute_reg_vif_num;
89 #endif
90 };
91
92 struct ipmr_rule {
93         struct fib_rule         common;
94 };
95
96 struct ipmr_result {
97         struct mr_table         *mrt;
98 };
99
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101    Note that the changes are semaphored via rtnl_lock.
102  */
103
104 static DEFINE_RWLOCK(mrt_lock);
105
106 /*
107  *      Multicast router control variables
108  */
109
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
111
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock);
114
115 /* We return to original Alan's scheme. Hash table of resolved
116    entries is changed only in process context and protected
117    with weak lock mrt_lock. Queue of unresolved entries is protected
118    with strong spinlock mfc_unres_lock.
119
120    In this case data path is free of exclusive locks at all.
121  */
122
123 static struct kmem_cache *mrt_cachep __read_mostly;
124
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127                          struct sk_buff *skb, struct mfc_cache *cache,
128                          int local);
129 static int ipmr_cache_report(struct mr_table *mrt,
130                              struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132                               struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg);
134
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137         list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140 {
141         struct mr_table *mrt;
142
143         ipmr_for_each_table(mrt, net) {
144                 if (mrt->id == id)
145                         return mrt;
146         }
147         return NULL;
148 }
149
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151                            struct mr_table **mrt)
152 {
153         struct ipmr_result res;
154         struct fib_lookup_arg arg = { .result = &res, };
155         int err;
156
157         err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158         if (err < 0)
159                 return err;
160         *mrt = res.mrt;
161         return 0;
162 }
163
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165                             int flags, struct fib_lookup_arg *arg)
166 {
167         struct ipmr_result *res = arg->result;
168         struct mr_table *mrt;
169
170         switch (rule->action) {
171         case FR_ACT_TO_TBL:
172                 break;
173         case FR_ACT_UNREACHABLE:
174                 return -ENETUNREACH;
175         case FR_ACT_PROHIBIT:
176                 return -EACCES;
177         case FR_ACT_BLACKHOLE:
178         default:
179                 return -EINVAL;
180         }
181
182         mrt = ipmr_get_table(rule->fr_net, rule->table);
183         if (mrt == NULL)
184                 return -EAGAIN;
185         res->mrt = mrt;
186         return 0;
187 }
188
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190 {
191         return 1;
192 }
193
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195         FRA_GENERIC_POLICY,
196 };
197
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199                                struct fib_rule_hdr *frh, struct nlattr **tb)
200 {
201         return 0;
202 }
203
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205                              struct nlattr **tb)
206 {
207         return 1;
208 }
209
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211                           struct fib_rule_hdr *frh)
212 {
213         frh->dst_len = 0;
214         frh->src_len = 0;
215         frh->tos     = 0;
216         return 0;
217 }
218
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220         .family         = RTNL_FAMILY_IPMR,
221         .rule_size      = sizeof(struct ipmr_rule),
222         .addr_size      = sizeof(u32),
223         .action         = ipmr_rule_action,
224         .match          = ipmr_rule_match,
225         .configure      = ipmr_rule_configure,
226         .compare        = ipmr_rule_compare,
227         .default_pref   = fib_default_rule_pref,
228         .fill           = ipmr_rule_fill,
229         .nlgroup        = RTNLGRP_IPV4_RULE,
230         .policy         = ipmr_rule_policy,
231         .owner          = THIS_MODULE,
232 };
233
234 static int __net_init ipmr_rules_init(struct net *net)
235 {
236         struct fib_rules_ops *ops;
237         struct mr_table *mrt;
238         int err;
239
240         ops = fib_rules_register(&ipmr_rules_ops_template, net);
241         if (IS_ERR(ops))
242                 return PTR_ERR(ops);
243
244         INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246         mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247         if (mrt == NULL) {
248                 err = -ENOMEM;
249                 goto err1;
250         }
251
252         err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253         if (err < 0)
254                 goto err2;
255
256         net->ipv4.mr_rules_ops = ops;
257         return 0;
258
259 err2:
260         kfree(mrt);
261 err1:
262         fib_rules_unregister(ops);
263         return err;
264 }
265
266 static void __net_exit ipmr_rules_exit(struct net *net)
267 {
268         struct mr_table *mrt, *next;
269
270         list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271                 list_del(&mrt->list);
272                 kfree(mrt);
273         }
274         fib_rules_unregister(net->ipv4.mr_rules_ops);
275 }
276 #else
277 #define ipmr_for_each_table(mrt, net) \
278         for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281 {
282         return net->ipv4.mrt;
283 }
284
285 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286                            struct mr_table **mrt)
287 {
288         *mrt = net->ipv4.mrt;
289         return 0;
290 }
291
292 static int __net_init ipmr_rules_init(struct net *net)
293 {
294         net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295         return net->ipv4.mrt ? 0 : -ENOMEM;
296 }
297
298 static void __net_exit ipmr_rules_exit(struct net *net)
299 {
300         kfree(net->ipv4.mrt);
301 }
302 #endif
303
304 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305 {
306         struct mr_table *mrt;
307         unsigned int i;
308
309         mrt = ipmr_get_table(net, id);
310         if (mrt != NULL)
311                 return mrt;
312
313         mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314         if (mrt == NULL)
315                 return NULL;
316         write_pnet(&mrt->net, net);
317         mrt->id = id;
318
319         /* Forwarding cache */
320         for (i = 0; i < MFC_LINES; i++)
321                 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323         INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325         setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326                     (unsigned long)mrt);
327
328 #ifdef CONFIG_IP_PIMSM
329         mrt->mroute_reg_vif_num = -1;
330 #endif
331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332         list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333 #endif
334         return mrt;
335 }
336
337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
338
339 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
340 {
341         struct net *net = dev_net(dev);
342
343         dev_close(dev);
344
345         dev = __dev_get_by_name(net, "tunl0");
346         if (dev) {
347                 const struct net_device_ops *ops = dev->netdev_ops;
348                 struct ifreq ifr;
349                 struct ip_tunnel_parm p;
350
351                 memset(&p, 0, sizeof(p));
352                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354                 p.iph.version = 4;
355                 p.iph.ihl = 5;
356                 p.iph.protocol = IPPROTO_IPIP;
357                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
359
360                 if (ops->ndo_do_ioctl) {
361                         mm_segment_t oldfs = get_fs();
362
363                         set_fs(KERNEL_DS);
364                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365                         set_fs(oldfs);
366                 }
367         }
368 }
369
370 static
371 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
372 {
373         struct net_device  *dev;
374
375         dev = __dev_get_by_name(net, "tunl0");
376
377         if (dev) {
378                 const struct net_device_ops *ops = dev->netdev_ops;
379                 int err;
380                 struct ifreq ifr;
381                 struct ip_tunnel_parm p;
382                 struct in_device  *in_dev;
383
384                 memset(&p, 0, sizeof(p));
385                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387                 p.iph.version = 4;
388                 p.iph.ihl = 5;
389                 p.iph.protocol = IPPROTO_IPIP;
390                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
391                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
392
393                 if (ops->ndo_do_ioctl) {
394                         mm_segment_t oldfs = get_fs();
395
396                         set_fs(KERNEL_DS);
397                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398                         set_fs(oldfs);
399                 } else
400                         err = -EOPNOTSUPP;
401
402                 dev = NULL;
403
404                 if (err == 0 &&
405                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
406                         dev->flags |= IFF_MULTICAST;
407
408                         in_dev = __in_dev_get_rtnl(dev);
409                         if (in_dev == NULL)
410                                 goto failure;
411
412                         ipv4_devconf_setall(in_dev);
413                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
414
415                         if (dev_open(dev))
416                                 goto failure;
417                         dev_hold(dev);
418                 }
419         }
420         return dev;
421
422 failure:
423         /* allow the register to be completed before unregistering. */
424         rtnl_unlock();
425         rtnl_lock();
426
427         unregister_netdevice(dev);
428         return NULL;
429 }
430
431 #ifdef CONFIG_IP_PIMSM
432
433 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
434 {
435         struct net *net = dev_net(dev);
436         struct mr_table *mrt;
437         struct flowi fl = {
438                 .oif            = dev->ifindex,
439                 .iif            = skb->skb_iif,
440                 .mark           = skb->mark,
441         };
442         int err;
443
444         err = ipmr_fib_lookup(net, &fl, &mrt);
445         if (err < 0) {
446                 kfree_skb(skb);
447                 return err;
448         }
449
450         read_lock(&mrt_lock);
451         dev->stats.tx_bytes += skb->len;
452         dev->stats.tx_packets++;
453         ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
454         read_unlock(&mrt_lock);
455         kfree_skb(skb);
456         return NETDEV_TX_OK;
457 }
458
459 static const struct net_device_ops reg_vif_netdev_ops = {
460         .ndo_start_xmit = reg_vif_xmit,
461 };
462
463 static void reg_vif_setup(struct net_device *dev)
464 {
465         dev->type               = ARPHRD_PIMREG;
466         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
467         dev->flags              = IFF_NOARP;
468         dev->netdev_ops         = &reg_vif_netdev_ops,
469         dev->destructor         = free_netdev;
470         dev->features           |= NETIF_F_NETNS_LOCAL;
471 }
472
473 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
474 {
475         struct net_device *dev;
476         struct in_device *in_dev;
477         char name[IFNAMSIZ];
478
479         if (mrt->id == RT_TABLE_DEFAULT)
480                 sprintf(name, "pimreg");
481         else
482                 sprintf(name, "pimreg%u", mrt->id);
483
484         dev = alloc_netdev(0, name, reg_vif_setup);
485
486         if (dev == NULL)
487                 return NULL;
488
489         dev_net_set(dev, net);
490
491         if (register_netdevice(dev)) {
492                 free_netdev(dev);
493                 return NULL;
494         }
495         dev->iflink = 0;
496
497         rcu_read_lock();
498         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
499                 rcu_read_unlock();
500                 goto failure;
501         }
502
503         ipv4_devconf_setall(in_dev);
504         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
505         rcu_read_unlock();
506
507         if (dev_open(dev))
508                 goto failure;
509
510         dev_hold(dev);
511
512         return dev;
513
514 failure:
515         /* allow the register to be completed before unregistering. */
516         rtnl_unlock();
517         rtnl_lock();
518
519         unregister_netdevice(dev);
520         return NULL;
521 }
522 #endif
523
524 /*
525  *      Delete a VIF entry
526  *      @notify: Set to 1, if the caller is a notifier_call
527  */
528
529 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
530                       struct list_head *head)
531 {
532         struct vif_device *v;
533         struct net_device *dev;
534         struct in_device *in_dev;
535
536         if (vifi < 0 || vifi >= mrt->maxvif)
537                 return -EADDRNOTAVAIL;
538
539         v = &mrt->vif_table[vifi];
540
541         write_lock_bh(&mrt_lock);
542         dev = v->dev;
543         v->dev = NULL;
544
545         if (!dev) {
546                 write_unlock_bh(&mrt_lock);
547                 return -EADDRNOTAVAIL;
548         }
549
550 #ifdef CONFIG_IP_PIMSM
551         if (vifi == mrt->mroute_reg_vif_num)
552                 mrt->mroute_reg_vif_num = -1;
553 #endif
554
555         if (vifi+1 == mrt->maxvif) {
556                 int tmp;
557                 for (tmp=vifi-1; tmp>=0; tmp--) {
558                         if (VIF_EXISTS(mrt, tmp))
559                                 break;
560                 }
561                 mrt->maxvif = tmp+1;
562         }
563
564         write_unlock_bh(&mrt_lock);
565
566         dev_set_allmulti(dev, -1);
567
568         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
569                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
570                 ip_rt_multicast_event(in_dev);
571         }
572
573         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
574                 unregister_netdevice_queue(dev, head);
575
576         dev_put(dev);
577         return 0;
578 }
579
580 static inline void ipmr_cache_free(struct mfc_cache *c)
581 {
582         kmem_cache_free(mrt_cachep, c);
583 }
584
585 /* Destroy an unresolved cache entry, killing queued skbs
586    and reporting error to netlink readers.
587  */
588
589 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
590 {
591         struct net *net = read_pnet(&mrt->net);
592         struct sk_buff *skb;
593         struct nlmsgerr *e;
594
595         atomic_dec(&mrt->cache_resolve_queue_len);
596
597         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
598                 if (ip_hdr(skb)->version == 0) {
599                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
600                         nlh->nlmsg_type = NLMSG_ERROR;
601                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
602                         skb_trim(skb, nlh->nlmsg_len);
603                         e = NLMSG_DATA(nlh);
604                         e->error = -ETIMEDOUT;
605                         memset(&e->msg, 0, sizeof(e->msg));
606
607                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
608                 } else
609                         kfree_skb(skb);
610         }
611
612         ipmr_cache_free(c);
613 }
614
615
616 /* Timer process for the unresolved queue. */
617
618 static void ipmr_expire_process(unsigned long arg)
619 {
620         struct mr_table *mrt = (struct mr_table *)arg;
621         unsigned long now;
622         unsigned long expires;
623         struct mfc_cache *c, *next;
624
625         if (!spin_trylock(&mfc_unres_lock)) {
626                 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
627                 return;
628         }
629
630         if (list_empty(&mrt->mfc_unres_queue))
631                 goto out;
632
633         now = jiffies;
634         expires = 10*HZ;
635
636         list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
637                 if (time_after(c->mfc_un.unres.expires, now)) {
638                         unsigned long interval = c->mfc_un.unres.expires - now;
639                         if (interval < expires)
640                                 expires = interval;
641                         continue;
642                 }
643
644                 list_del(&c->list);
645                 ipmr_destroy_unres(mrt, c);
646         }
647
648         if (!list_empty(&mrt->mfc_unres_queue))
649                 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
650
651 out:
652         spin_unlock(&mfc_unres_lock);
653 }
654
655 /* Fill oifs list. It is called under write locked mrt_lock. */
656
657 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
658                                    unsigned char *ttls)
659 {
660         int vifi;
661
662         cache->mfc_un.res.minvif = MAXVIFS;
663         cache->mfc_un.res.maxvif = 0;
664         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
665
666         for (vifi = 0; vifi < mrt->maxvif; vifi++) {
667                 if (VIF_EXISTS(mrt, vifi) &&
668                     ttls[vifi] && ttls[vifi] < 255) {
669                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
670                         if (cache->mfc_un.res.minvif > vifi)
671                                 cache->mfc_un.res.minvif = vifi;
672                         if (cache->mfc_un.res.maxvif <= vifi)
673                                 cache->mfc_un.res.maxvif = vifi + 1;
674                 }
675         }
676 }
677
678 static int vif_add(struct net *net, struct mr_table *mrt,
679                    struct vifctl *vifc, int mrtsock)
680 {
681         int vifi = vifc->vifc_vifi;
682         struct vif_device *v = &mrt->vif_table[vifi];
683         struct net_device *dev;
684         struct in_device *in_dev;
685         int err;
686
687         /* Is vif busy ? */
688         if (VIF_EXISTS(mrt, vifi))
689                 return -EADDRINUSE;
690
691         switch (vifc->vifc_flags) {
692 #ifdef CONFIG_IP_PIMSM
693         case VIFF_REGISTER:
694                 /*
695                  * Special Purpose VIF in PIM
696                  * All the packets will be sent to the daemon
697                  */
698                 if (mrt->mroute_reg_vif_num >= 0)
699                         return -EADDRINUSE;
700                 dev = ipmr_reg_vif(net, mrt);
701                 if (!dev)
702                         return -ENOBUFS;
703                 err = dev_set_allmulti(dev, 1);
704                 if (err) {
705                         unregister_netdevice(dev);
706                         dev_put(dev);
707                         return err;
708                 }
709                 break;
710 #endif
711         case VIFF_TUNNEL:
712                 dev = ipmr_new_tunnel(net, vifc);
713                 if (!dev)
714                         return -ENOBUFS;
715                 err = dev_set_allmulti(dev, 1);
716                 if (err) {
717                         ipmr_del_tunnel(dev, vifc);
718                         dev_put(dev);
719                         return err;
720                 }
721                 break;
722
723         case VIFF_USE_IFINDEX:
724         case 0:
725                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727                         if (dev && dev->ip_ptr == NULL) {
728                                 dev_put(dev);
729                                 return -EADDRNOTAVAIL;
730                         }
731                 } else
732                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
733
734                 if (!dev)
735                         return -EADDRNOTAVAIL;
736                 err = dev_set_allmulti(dev, 1);
737                 if (err) {
738                         dev_put(dev);
739                         return err;
740                 }
741                 break;
742         default:
743                 return -EINVAL;
744         }
745
746         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
747                 dev_put(dev);
748                 return -EADDRNOTAVAIL;
749         }
750         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
751         ip_rt_multicast_event(in_dev);
752
753         /*
754          *      Fill in the VIF structures
755          */
756         v->rate_limit = vifc->vifc_rate_limit;
757         v->local = vifc->vifc_lcl_addr.s_addr;
758         v->remote = vifc->vifc_rmt_addr.s_addr;
759         v->flags = vifc->vifc_flags;
760         if (!mrtsock)
761                 v->flags |= VIFF_STATIC;
762         v->threshold = vifc->vifc_threshold;
763         v->bytes_in = 0;
764         v->bytes_out = 0;
765         v->pkt_in = 0;
766         v->pkt_out = 0;
767         v->link = dev->ifindex;
768         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
769                 v->link = dev->iflink;
770
771         /* And finish update writing critical data */
772         write_lock_bh(&mrt_lock);
773         v->dev = dev;
774 #ifdef CONFIG_IP_PIMSM
775         if (v->flags&VIFF_REGISTER)
776                 mrt->mroute_reg_vif_num = vifi;
777 #endif
778         if (vifi+1 > mrt->maxvif)
779                 mrt->maxvif = vifi+1;
780         write_unlock_bh(&mrt_lock);
781         return 0;
782 }
783
784 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
785                                          __be32 origin,
786                                          __be32 mcastgrp)
787 {
788         int line = MFC_HASH(mcastgrp, origin);
789         struct mfc_cache *c;
790
791         list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
792                 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793                         return c;
794         }
795         return NULL;
796 }
797
798 /*
799  *      Allocate a multicast cache entry
800  */
801 static struct mfc_cache *ipmr_cache_alloc(void)
802 {
803         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804         if (c == NULL)
805                 return NULL;
806         c->mfc_un.res.minvif = MAXVIFS;
807         return c;
808 }
809
810 static struct mfc_cache *ipmr_cache_alloc_unres(void)
811 {
812         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813         if (c == NULL)
814                 return NULL;
815         skb_queue_head_init(&c->mfc_un.unres.unresolved);
816         c->mfc_un.unres.expires = jiffies + 10*HZ;
817         return c;
818 }
819
820 /*
821  *      A cache entry has gone into a resolved state from queued
822  */
823
824 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
825                                struct mfc_cache *uc, struct mfc_cache *c)
826 {
827         struct sk_buff *skb;
828         struct nlmsgerr *e;
829
830         /*
831          *      Play the pending entries through our router
832          */
833
834         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
835                 if (ip_hdr(skb)->version == 0) {
836                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
837
838                         if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
839                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
840                                                   (u8 *)nlh);
841                         } else {
842                                 nlh->nlmsg_type = NLMSG_ERROR;
843                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
844                                 skb_trim(skb, nlh->nlmsg_len);
845                                 e = NLMSG_DATA(nlh);
846                                 e->error = -EMSGSIZE;
847                                 memset(&e->msg, 0, sizeof(e->msg));
848                         }
849
850                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
851                 } else
852                         ip_mr_forward(net, mrt, skb, c, 0);
853         }
854 }
855
856 /*
857  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
858  *      expects the following bizarre scheme.
859  *
860  *      Called under mrt_lock.
861  */
862
863 static int ipmr_cache_report(struct mr_table *mrt,
864                              struct sk_buff *pkt, vifi_t vifi, int assert)
865 {
866         struct sk_buff *skb;
867         const int ihl = ip_hdrlen(pkt);
868         struct igmphdr *igmp;
869         struct igmpmsg *msg;
870         int ret;
871
872 #ifdef CONFIG_IP_PIMSM
873         if (assert == IGMPMSG_WHOLEPKT)
874                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
875         else
876 #endif
877                 skb = alloc_skb(128, GFP_ATOMIC);
878
879         if (!skb)
880                 return -ENOBUFS;
881
882 #ifdef CONFIG_IP_PIMSM
883         if (assert == IGMPMSG_WHOLEPKT) {
884                 /* Ugly, but we have no choice with this interface.
885                    Duplicate old header, fix ihl, length etc.
886                    And all this only to mangle msg->im_msgtype and
887                    to set msg->im_mbz to "mbz" :-)
888                  */
889                 skb_push(skb, sizeof(struct iphdr));
890                 skb_reset_network_header(skb);
891                 skb_reset_transport_header(skb);
892                 msg = (struct igmpmsg *)skb_network_header(skb);
893                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
894                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
895                 msg->im_mbz = 0;
896                 msg->im_vif = mrt->mroute_reg_vif_num;
897                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
898                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
899                                              sizeof(struct iphdr));
900         } else
901 #endif
902         {
903
904         /*
905          *      Copy the IP header
906          */
907
908         skb->network_header = skb->tail;
909         skb_put(skb, ihl);
910         skb_copy_to_linear_data(skb, pkt->data, ihl);
911         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
912         msg = (struct igmpmsg *)skb_network_header(skb);
913         msg->im_vif = vifi;
914         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
915
916         /*
917          *      Add our header
918          */
919
920         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
921         igmp->type      =
922         msg->im_msgtype = assert;
923         igmp->code      =       0;
924         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
925         skb->transport_header = skb->network_header;
926         }
927
928         if (mrt->mroute_sk == NULL) {
929                 kfree_skb(skb);
930                 return -EINVAL;
931         }
932
933         /*
934          *      Deliver to mrouted
935          */
936         ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
937         if (ret < 0) {
938                 if (net_ratelimit())
939                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
940                 kfree_skb(skb);
941         }
942
943         return ret;
944 }
945
946 /*
947  *      Queue a packet for resolution. It gets locked cache entry!
948  */
949
950 static int
951 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
952 {
953         bool found = false;
954         int err;
955         struct mfc_cache *c;
956         const struct iphdr *iph = ip_hdr(skb);
957
958         spin_lock_bh(&mfc_unres_lock);
959         list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
960                 if (c->mfc_mcastgrp == iph->daddr &&
961                     c->mfc_origin == iph->saddr) {
962                         found = true;
963                         break;
964                 }
965         }
966
967         if (!found) {
968                 /*
969                  *      Create a new entry if allowable
970                  */
971
972                 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
973                     (c = ipmr_cache_alloc_unres()) == NULL) {
974                         spin_unlock_bh(&mfc_unres_lock);
975
976                         kfree_skb(skb);
977                         return -ENOBUFS;
978                 }
979
980                 /*
981                  *      Fill in the new cache entry
982                  */
983                 c->mfc_parent   = -1;
984                 c->mfc_origin   = iph->saddr;
985                 c->mfc_mcastgrp = iph->daddr;
986
987                 /*
988                  *      Reflect first query at mrouted.
989                  */
990                 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
991                 if (err < 0) {
992                         /* If the report failed throw the cache entry
993                            out - Brad Parker
994                          */
995                         spin_unlock_bh(&mfc_unres_lock);
996
997                         ipmr_cache_free(c);
998                         kfree_skb(skb);
999                         return err;
1000                 }
1001
1002                 atomic_inc(&mrt->cache_resolve_queue_len);
1003                 list_add(&c->list, &mrt->mfc_unres_queue);
1004
1005                 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1006                         mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1007         }
1008
1009         /*
1010          *      See if we can append the packet
1011          */
1012         if (c->mfc_un.unres.unresolved.qlen>3) {
1013                 kfree_skb(skb);
1014                 err = -ENOBUFS;
1015         } else {
1016                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1017                 err = 0;
1018         }
1019
1020         spin_unlock_bh(&mfc_unres_lock);
1021         return err;
1022 }
1023
1024 /*
1025  *      MFC cache manipulation by user space mroute daemon
1026  */
1027
1028 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1029 {
1030         int line;
1031         struct mfc_cache *c, *next;
1032
1033         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1034
1035         list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1036                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1037                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1038                         write_lock_bh(&mrt_lock);
1039                         list_del(&c->list);
1040                         write_unlock_bh(&mrt_lock);
1041
1042                         ipmr_cache_free(c);
1043                         return 0;
1044                 }
1045         }
1046         return -ENOENT;
1047 }
1048
1049 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1050                         struct mfcctl *mfc, int mrtsock)
1051 {
1052         bool found = false;
1053         int line;
1054         struct mfc_cache *uc, *c;
1055
1056         if (mfc->mfcc_parent >= MAXVIFS)
1057                 return -ENFILE;
1058
1059         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1060
1061         list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1062                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1063                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1064                         found = true;
1065                         break;
1066                 }
1067         }
1068
1069         if (found) {
1070                 write_lock_bh(&mrt_lock);
1071                 c->mfc_parent = mfc->mfcc_parent;
1072                 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1073                 if (!mrtsock)
1074                         c->mfc_flags |= MFC_STATIC;
1075                 write_unlock_bh(&mrt_lock);
1076                 return 0;
1077         }
1078
1079         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1080                 return -EINVAL;
1081
1082         c = ipmr_cache_alloc();
1083         if (c == NULL)
1084                 return -ENOMEM;
1085
1086         c->mfc_origin = mfc->mfcc_origin.s_addr;
1087         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1088         c->mfc_parent = mfc->mfcc_parent;
1089         ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1090         if (!mrtsock)
1091                 c->mfc_flags |= MFC_STATIC;
1092
1093         write_lock_bh(&mrt_lock);
1094         list_add(&c->list, &mrt->mfc_cache_array[line]);
1095         write_unlock_bh(&mrt_lock);
1096
1097         /*
1098          *      Check to see if we resolved a queued list. If so we
1099          *      need to send on the frames and tidy up.
1100          */
1101         found = false;
1102         spin_lock_bh(&mfc_unres_lock);
1103         list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1104                 if (uc->mfc_origin == c->mfc_origin &&
1105                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1106                         list_del(&uc->list);
1107                         atomic_dec(&mrt->cache_resolve_queue_len);
1108                         found = true;
1109                         break;
1110                 }
1111         }
1112         if (list_empty(&mrt->mfc_unres_queue))
1113                 del_timer(&mrt->ipmr_expire_timer);
1114         spin_unlock_bh(&mfc_unres_lock);
1115
1116         if (found) {
1117                 ipmr_cache_resolve(net, mrt, uc, c);
1118                 ipmr_cache_free(uc);
1119         }
1120         return 0;
1121 }
1122
1123 /*
1124  *      Close the multicast socket, and clear the vif tables etc
1125  */
1126
1127 static void mroute_clean_tables(struct mr_table *mrt)
1128 {
1129         int i;
1130         LIST_HEAD(list);
1131         struct mfc_cache *c, *next;
1132
1133         /*
1134          *      Shut down all active vif entries
1135          */
1136         for (i = 0; i < mrt->maxvif; i++) {
1137                 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1138                         vif_delete(mrt, i, 0, &list);
1139         }
1140         unregister_netdevice_many(&list);
1141
1142         /*
1143          *      Wipe the cache
1144          */
1145         for (i = 0; i < MFC_LINES; i++) {
1146                 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1147                         if (c->mfc_flags&MFC_STATIC)
1148                                 continue;
1149                         write_lock_bh(&mrt_lock);
1150                         list_del(&c->list);
1151                         write_unlock_bh(&mrt_lock);
1152
1153                         ipmr_cache_free(c);
1154                 }
1155         }
1156
1157         if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1158                 spin_lock_bh(&mfc_unres_lock);
1159                 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1160                         list_del(&c->list);
1161                         ipmr_destroy_unres(mrt, c);
1162                 }
1163                 spin_unlock_bh(&mfc_unres_lock);
1164         }
1165 }
1166
1167 static void mrtsock_destruct(struct sock *sk)
1168 {
1169         struct net *net = sock_net(sk);
1170         struct mr_table *mrt;
1171
1172         rtnl_lock();
1173         ipmr_for_each_table(mrt, net) {
1174                 if (sk == mrt->mroute_sk) {
1175                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1176
1177                         write_lock_bh(&mrt_lock);
1178                         mrt->mroute_sk = NULL;
1179                         write_unlock_bh(&mrt_lock);
1180
1181                         mroute_clean_tables(mrt);
1182                 }
1183         }
1184         rtnl_unlock();
1185 }
1186
1187 /*
1188  *      Socket options and virtual interface manipulation. The whole
1189  *      virtual interface system is a complete heap, but unfortunately
1190  *      that's how BSD mrouted happens to think. Maybe one day with a proper
1191  *      MOSPF/PIM router set up we can clean this up.
1192  */
1193
1194 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1195 {
1196         int ret;
1197         struct vifctl vif;
1198         struct mfcctl mfc;
1199         struct net *net = sock_net(sk);
1200         struct mr_table *mrt;
1201
1202         mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1203         if (mrt == NULL)
1204                 return -ENOENT;
1205
1206         if (optname != MRT_INIT) {
1207                 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1208                         return -EACCES;
1209         }
1210
1211         switch (optname) {
1212         case MRT_INIT:
1213                 if (sk->sk_type != SOCK_RAW ||
1214                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
1215                         return -EOPNOTSUPP;
1216                 if (optlen != sizeof(int))
1217                         return -ENOPROTOOPT;
1218
1219                 rtnl_lock();
1220                 if (mrt->mroute_sk) {
1221                         rtnl_unlock();
1222                         return -EADDRINUSE;
1223                 }
1224
1225                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226                 if (ret == 0) {
1227                         write_lock_bh(&mrt_lock);
1228                         mrt->mroute_sk = sk;
1229                         write_unlock_bh(&mrt_lock);
1230
1231                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1232                 }
1233                 rtnl_unlock();
1234                 return ret;
1235         case MRT_DONE:
1236                 if (sk != mrt->mroute_sk)
1237                         return -EACCES;
1238                 return ip_ra_control(sk, 0, NULL);
1239         case MRT_ADD_VIF:
1240         case MRT_DEL_VIF:
1241                 if (optlen != sizeof(vif))
1242                         return -EINVAL;
1243                 if (copy_from_user(&vif, optval, sizeof(vif)))
1244                         return -EFAULT;
1245                 if (vif.vifc_vifi >= MAXVIFS)
1246                         return -ENFILE;
1247                 rtnl_lock();
1248                 if (optname == MRT_ADD_VIF) {
1249                         ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1250                 } else {
1251                         ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1252                 }
1253                 rtnl_unlock();
1254                 return ret;
1255
1256                 /*
1257                  *      Manipulate the forwarding caches. These live
1258                  *      in a sort of kernel/user symbiosis.
1259                  */
1260         case MRT_ADD_MFC:
1261         case MRT_DEL_MFC:
1262                 if (optlen != sizeof(mfc))
1263                         return -EINVAL;
1264                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1265                         return -EFAULT;
1266                 rtnl_lock();
1267                 if (optname == MRT_DEL_MFC)
1268                         ret = ipmr_mfc_delete(mrt, &mfc);
1269                 else
1270                         ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1271                 rtnl_unlock();
1272                 return ret;
1273                 /*
1274                  *      Control PIM assert.
1275                  */
1276         case MRT_ASSERT:
1277         {
1278                 int v;
1279                 if (get_user(v,(int __user *)optval))
1280                         return -EFAULT;
1281                 mrt->mroute_do_assert = (v) ? 1 : 0;
1282                 return 0;
1283         }
1284 #ifdef CONFIG_IP_PIMSM
1285         case MRT_PIM:
1286         {
1287                 int v;
1288
1289                 if (get_user(v,(int __user *)optval))
1290                         return -EFAULT;
1291                 v = (v) ? 1 : 0;
1292
1293                 rtnl_lock();
1294                 ret = 0;
1295                 if (v != mrt->mroute_do_pim) {
1296                         mrt->mroute_do_pim = v;
1297                         mrt->mroute_do_assert = v;
1298                 }
1299                 rtnl_unlock();
1300                 return ret;
1301         }
1302 #endif
1303 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1304         case MRT_TABLE:
1305         {
1306                 u32 v;
1307
1308                 if (optlen != sizeof(u32))
1309                         return -EINVAL;
1310                 if (get_user(v, (u32 __user *)optval))
1311                         return -EFAULT;
1312                 if (sk == mrt->mroute_sk)
1313                         return -EBUSY;
1314
1315                 rtnl_lock();
1316                 ret = 0;
1317                 if (!ipmr_new_table(net, v))
1318                         ret = -ENOMEM;
1319                 raw_sk(sk)->ipmr_table = v;
1320                 rtnl_unlock();
1321                 return ret;
1322         }
1323 #endif
1324         /*
1325          *      Spurious command, or MRT_VERSION which you cannot
1326          *      set.
1327          */
1328         default:
1329                 return -ENOPROTOOPT;
1330         }
1331 }
1332
1333 /*
1334  *      Getsock opt support for the multicast routing system.
1335  */
1336
1337 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1338 {
1339         int olr;
1340         int val;
1341         struct net *net = sock_net(sk);
1342         struct mr_table *mrt;
1343
1344         mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1345         if (mrt == NULL)
1346                 return -ENOENT;
1347
1348         if (optname != MRT_VERSION &&
1349 #ifdef CONFIG_IP_PIMSM
1350            optname!=MRT_PIM &&
1351 #endif
1352            optname!=MRT_ASSERT)
1353                 return -ENOPROTOOPT;
1354
1355         if (get_user(olr, optlen))
1356                 return -EFAULT;
1357
1358         olr = min_t(unsigned int, olr, sizeof(int));
1359         if (olr < 0)
1360                 return -EINVAL;
1361
1362         if (put_user(olr, optlen))
1363                 return -EFAULT;
1364         if (optname == MRT_VERSION)
1365                 val = 0x0305;
1366 #ifdef CONFIG_IP_PIMSM
1367         else if (optname == MRT_PIM)
1368                 val = mrt->mroute_do_pim;
1369 #endif
1370         else
1371                 val = mrt->mroute_do_assert;
1372         if (copy_to_user(optval, &val, olr))
1373                 return -EFAULT;
1374         return 0;
1375 }
1376
1377 /*
1378  *      The IP multicast ioctl support routines.
1379  */
1380
1381 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1382 {
1383         struct sioc_sg_req sr;
1384         struct sioc_vif_req vr;
1385         struct vif_device *vif;
1386         struct mfc_cache *c;
1387         struct net *net = sock_net(sk);
1388         struct mr_table *mrt;
1389
1390         mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1391         if (mrt == NULL)
1392                 return -ENOENT;
1393
1394         switch (cmd) {
1395         case SIOCGETVIFCNT:
1396                 if (copy_from_user(&vr, arg, sizeof(vr)))
1397                         return -EFAULT;
1398                 if (vr.vifi >= mrt->maxvif)
1399                         return -EINVAL;
1400                 read_lock(&mrt_lock);
1401                 vif = &mrt->vif_table[vr.vifi];
1402                 if (VIF_EXISTS(mrt, vr.vifi)) {
1403                         vr.icount = vif->pkt_in;
1404                         vr.ocount = vif->pkt_out;
1405                         vr.ibytes = vif->bytes_in;
1406                         vr.obytes = vif->bytes_out;
1407                         read_unlock(&mrt_lock);
1408
1409                         if (copy_to_user(arg, &vr, sizeof(vr)))
1410                                 return -EFAULT;
1411                         return 0;
1412                 }
1413                 read_unlock(&mrt_lock);
1414                 return -EADDRNOTAVAIL;
1415         case SIOCGETSGCNT:
1416                 if (copy_from_user(&sr, arg, sizeof(sr)))
1417                         return -EFAULT;
1418
1419                 read_lock(&mrt_lock);
1420                 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1421                 if (c) {
1422                         sr.pktcnt = c->mfc_un.res.pkt;
1423                         sr.bytecnt = c->mfc_un.res.bytes;
1424                         sr.wrong_if = c->mfc_un.res.wrong_if;
1425                         read_unlock(&mrt_lock);
1426
1427                         if (copy_to_user(arg, &sr, sizeof(sr)))
1428                                 return -EFAULT;
1429                         return 0;
1430                 }
1431                 read_unlock(&mrt_lock);
1432                 return -EADDRNOTAVAIL;
1433         default:
1434                 return -ENOIOCTLCMD;
1435         }
1436 }
1437
1438
1439 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1440 {
1441         struct net_device *dev = ptr;
1442         struct net *net = dev_net(dev);
1443         struct mr_table *mrt;
1444         struct vif_device *v;
1445         int ct;
1446         LIST_HEAD(list);
1447
1448         if (event != NETDEV_UNREGISTER)
1449                 return NOTIFY_DONE;
1450
1451         ipmr_for_each_table(mrt, net) {
1452                 v = &mrt->vif_table[0];
1453                 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1454                         if (v->dev == dev)
1455                                 vif_delete(mrt, ct, 1, &list);
1456                 }
1457         }
1458         unregister_netdevice_many(&list);
1459         return NOTIFY_DONE;
1460 }
1461
1462
1463 static struct notifier_block ip_mr_notifier = {
1464         .notifier_call = ipmr_device_event,
1465 };
1466
1467 /*
1468  *      Encapsulate a packet by attaching a valid IPIP header to it.
1469  *      This avoids tunnel drivers and other mess and gives us the speed so
1470  *      important for multicast video.
1471  */
1472
1473 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1474 {
1475         struct iphdr *iph;
1476         struct iphdr *old_iph = ip_hdr(skb);
1477
1478         skb_push(skb, sizeof(struct iphdr));
1479         skb->transport_header = skb->network_header;
1480         skb_reset_network_header(skb);
1481         iph = ip_hdr(skb);
1482
1483         iph->version    =       4;
1484         iph->tos        =       old_iph->tos;
1485         iph->ttl        =       old_iph->ttl;
1486         iph->frag_off   =       0;
1487         iph->daddr      =       daddr;
1488         iph->saddr      =       saddr;
1489         iph->protocol   =       IPPROTO_IPIP;
1490         iph->ihl        =       5;
1491         iph->tot_len    =       htons(skb->len);
1492         ip_select_ident(iph, skb_dst(skb), NULL);
1493         ip_send_check(iph);
1494
1495         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1496         nf_reset(skb);
1497 }
1498
1499 static inline int ipmr_forward_finish(struct sk_buff *skb)
1500 {
1501         struct ip_options * opt = &(IPCB(skb)->opt);
1502
1503         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1504
1505         if (unlikely(opt->optlen))
1506                 ip_forward_options(skb);
1507
1508         return dst_output(skb);
1509 }
1510
1511 /*
1512  *      Processing handlers for ipmr_forward
1513  */
1514
1515 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1516                             struct sk_buff *skb, struct mfc_cache *c, int vifi)
1517 {
1518         const struct iphdr *iph = ip_hdr(skb);
1519         struct vif_device *vif = &mrt->vif_table[vifi];
1520         struct net_device *dev;
1521         struct rtable *rt;
1522         int    encap = 0;
1523
1524         if (vif->dev == NULL)
1525                 goto out_free;
1526
1527 #ifdef CONFIG_IP_PIMSM
1528         if (vif->flags & VIFF_REGISTER) {
1529                 vif->pkt_out++;
1530                 vif->bytes_out += skb->len;
1531                 vif->dev->stats.tx_bytes += skb->len;
1532                 vif->dev->stats.tx_packets++;
1533                 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1534                 goto out_free;
1535         }
1536 #endif
1537
1538         if (vif->flags&VIFF_TUNNEL) {
1539                 struct flowi fl = { .oif = vif->link,
1540                                     .nl_u = { .ip4_u =
1541                                               { .daddr = vif->remote,
1542                                                 .saddr = vif->local,
1543                                                 .tos = RT_TOS(iph->tos) } },
1544                                     .proto = IPPROTO_IPIP };
1545                 if (ip_route_output_key(net, &rt, &fl))
1546                         goto out_free;
1547                 encap = sizeof(struct iphdr);
1548         } else {
1549                 struct flowi fl = { .oif = vif->link,
1550                                     .nl_u = { .ip4_u =
1551                                               { .daddr = iph->daddr,
1552                                                 .tos = RT_TOS(iph->tos) } },
1553                                     .proto = IPPROTO_IPIP };
1554                 if (ip_route_output_key(net, &rt, &fl))
1555                         goto out_free;
1556         }
1557
1558         dev = rt->dst.dev;
1559
1560         if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1561                 /* Do not fragment multicasts. Alas, IPv4 does not
1562                    allow to send ICMP, so that packets will disappear
1563                    to blackhole.
1564                  */
1565
1566                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1567                 ip_rt_put(rt);
1568                 goto out_free;
1569         }
1570
1571         encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1572
1573         if (skb_cow(skb, encap)) {
1574                 ip_rt_put(rt);
1575                 goto out_free;
1576         }
1577
1578         vif->pkt_out++;
1579         vif->bytes_out += skb->len;
1580
1581         skb_dst_drop(skb);
1582         skb_dst_set(skb, &rt->dst);
1583         ip_decrease_ttl(ip_hdr(skb));
1584
1585         /* FIXME: forward and output firewalls used to be called here.
1586          * What do we do with netfilter? -- RR */
1587         if (vif->flags & VIFF_TUNNEL) {
1588                 ip_encap(skb, vif->local, vif->remote);
1589                 /* FIXME: extra output firewall step used to be here. --RR */
1590                 vif->dev->stats.tx_packets++;
1591                 vif->dev->stats.tx_bytes += skb->len;
1592         }
1593
1594         IPCB(skb)->flags |= IPSKB_FORWARDED;
1595
1596         /*
1597          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1598          * not only before forwarding, but after forwarding on all output
1599          * interfaces. It is clear, if mrouter runs a multicasting
1600          * program, it should receive packets not depending to what interface
1601          * program is joined.
1602          * If we will not make it, the program will have to join on all
1603          * interfaces. On the other hand, multihoming host (or router, but
1604          * not mrouter) cannot join to more than one interface - it will
1605          * result in receiving multiple packets.
1606          */
1607         NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1608                 ipmr_forward_finish);
1609         return;
1610
1611 out_free:
1612         kfree_skb(skb);
1613 }
1614
1615 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1616 {
1617         int ct;
1618
1619         for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1620                 if (mrt->vif_table[ct].dev == dev)
1621                         break;
1622         }
1623         return ct;
1624 }
1625
1626 /* "local" means that we should preserve one skb (for local delivery) */
1627
1628 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1629                          struct sk_buff *skb, struct mfc_cache *cache,
1630                          int local)
1631 {
1632         int psend = -1;
1633         int vif, ct;
1634
1635         vif = cache->mfc_parent;
1636         cache->mfc_un.res.pkt++;
1637         cache->mfc_un.res.bytes += skb->len;
1638
1639         /*
1640          * Wrong interface: drop packet and (maybe) send PIM assert.
1641          */
1642         if (mrt->vif_table[vif].dev != skb->dev) {
1643                 int true_vifi;
1644
1645                 if (skb_rtable(skb)->fl.iif == 0) {
1646                         /* It is our own packet, looped back.
1647                            Very complicated situation...
1648
1649                            The best workaround until routing daemons will be
1650                            fixed is not to redistribute packet, if it was
1651                            send through wrong interface. It means, that
1652                            multicast applications WILL NOT work for
1653                            (S,G), which have default multicast route pointing
1654                            to wrong oif. In any case, it is not a good
1655                            idea to use multicasting applications on router.
1656                          */
1657                         goto dont_forward;
1658                 }
1659
1660                 cache->mfc_un.res.wrong_if++;
1661                 true_vifi = ipmr_find_vif(mrt, skb->dev);
1662
1663                 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1664                     /* pimsm uses asserts, when switching from RPT to SPT,
1665                        so that we cannot check that packet arrived on an oif.
1666                        It is bad, but otherwise we would need to move pretty
1667                        large chunk of pimd to kernel. Ough... --ANK
1668                      */
1669                     (mrt->mroute_do_pim ||
1670                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1671                     time_after(jiffies,
1672                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1673                         cache->mfc_un.res.last_assert = jiffies;
1674                         ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1675                 }
1676                 goto dont_forward;
1677         }
1678
1679         mrt->vif_table[vif].pkt_in++;
1680         mrt->vif_table[vif].bytes_in += skb->len;
1681
1682         /*
1683          *      Forward the frame
1684          */
1685         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1686                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1687                         if (psend != -1) {
1688                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1689                                 if (skb2)
1690                                         ipmr_queue_xmit(net, mrt, skb2, cache,
1691                                                         psend);
1692                         }
1693                         psend = ct;
1694                 }
1695         }
1696         if (psend != -1) {
1697                 if (local) {
1698                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1699                         if (skb2)
1700                                 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1701                 } else {
1702                         ipmr_queue_xmit(net, mrt, skb, cache, psend);
1703                         return 0;
1704                 }
1705         }
1706
1707 dont_forward:
1708         if (!local)
1709                 kfree_skb(skb);
1710         return 0;
1711 }
1712
1713
1714 /*
1715  *      Multicast packets for forwarding arrive here
1716  */
1717
1718 int ip_mr_input(struct sk_buff *skb)
1719 {
1720         struct mfc_cache *cache;
1721         struct net *net = dev_net(skb->dev);
1722         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1723         struct mr_table *mrt;
1724         int err;
1725
1726         /* Packet is looped back after forward, it should not be
1727            forwarded second time, but still can be delivered locally.
1728          */
1729         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1730                 goto dont_forward;
1731
1732         err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1733         if (err < 0) {
1734                 kfree_skb(skb);
1735                 return err;
1736         }
1737
1738         if (!local) {
1739                     if (IPCB(skb)->opt.router_alert) {
1740                             if (ip_call_ra_chain(skb))
1741                                     return 0;
1742                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1743                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1744                                Cisco IOS <= 11.2(8)) do not put router alert
1745                                option to IGMP packets destined to routable
1746                                groups. It is very bad, because it means
1747                                that we can forward NO IGMP messages.
1748                              */
1749                             read_lock(&mrt_lock);
1750                             if (mrt->mroute_sk) {
1751                                     nf_reset(skb);
1752                                     raw_rcv(mrt->mroute_sk, skb);
1753                                     read_unlock(&mrt_lock);
1754                                     return 0;
1755                             }
1756                             read_unlock(&mrt_lock);
1757                     }
1758         }
1759
1760         read_lock(&mrt_lock);
1761         cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1762
1763         /*
1764          *      No usable cache entry
1765          */
1766         if (cache == NULL) {
1767                 int vif;
1768
1769                 if (local) {
1770                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1771                         ip_local_deliver(skb);
1772                         if (skb2 == NULL) {
1773                                 read_unlock(&mrt_lock);
1774                                 return -ENOBUFS;
1775                         }
1776                         skb = skb2;
1777                 }
1778
1779                 vif = ipmr_find_vif(mrt, skb->dev);
1780                 if (vif >= 0) {
1781                         int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1782                         read_unlock(&mrt_lock);
1783
1784                         return err2;
1785                 }
1786                 read_unlock(&mrt_lock);
1787                 kfree_skb(skb);
1788                 return -ENODEV;
1789         }
1790
1791         ip_mr_forward(net, mrt, skb, cache, local);
1792
1793         read_unlock(&mrt_lock);
1794
1795         if (local)
1796                 return ip_local_deliver(skb);
1797
1798         return 0;
1799
1800 dont_forward:
1801         if (local)
1802                 return ip_local_deliver(skb);
1803         kfree_skb(skb);
1804         return 0;
1805 }
1806
1807 #ifdef CONFIG_IP_PIMSM
1808 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1809                      unsigned int pimlen)
1810 {
1811         struct net_device *reg_dev = NULL;
1812         struct iphdr *encap;
1813
1814         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1815         /*
1816            Check that:
1817            a. packet is really destinted to a multicast group
1818            b. packet is not a NULL-REGISTER
1819            c. packet is not truncated
1820          */
1821         if (!ipv4_is_multicast(encap->daddr) ||
1822             encap->tot_len == 0 ||
1823             ntohs(encap->tot_len) + pimlen > skb->len)
1824                 return 1;
1825
1826         read_lock(&mrt_lock);
1827         if (mrt->mroute_reg_vif_num >= 0)
1828                 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1829         if (reg_dev)
1830                 dev_hold(reg_dev);
1831         read_unlock(&mrt_lock);
1832
1833         if (reg_dev == NULL)
1834                 return 1;
1835
1836         skb->mac_header = skb->network_header;
1837         skb_pull(skb, (u8*)encap - skb->data);
1838         skb_reset_network_header(skb);
1839         skb->protocol = htons(ETH_P_IP);
1840         skb->ip_summed = 0;
1841         skb->pkt_type = PACKET_HOST;
1842
1843         skb_tunnel_rx(skb, reg_dev);
1844
1845         netif_rx(skb);
1846         dev_put(reg_dev);
1847
1848         return 0;
1849 }
1850 #endif
1851
1852 #ifdef CONFIG_IP_PIMSM_V1
1853 /*
1854  * Handle IGMP messages of PIMv1
1855  */
1856
1857 int pim_rcv_v1(struct sk_buff * skb)
1858 {
1859         struct igmphdr *pim;
1860         struct net *net = dev_net(skb->dev);
1861         struct mr_table *mrt;
1862
1863         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1864                 goto drop;
1865
1866         pim = igmp_hdr(skb);
1867
1868         if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1869                 goto drop;
1870
1871         if (!mrt->mroute_do_pim ||
1872             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1873                 goto drop;
1874
1875         if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1876 drop:
1877                 kfree_skb(skb);
1878         }
1879         return 0;
1880 }
1881 #endif
1882
1883 #ifdef CONFIG_IP_PIMSM_V2
1884 static int pim_rcv(struct sk_buff * skb)
1885 {
1886         struct pimreghdr *pim;
1887         struct net *net = dev_net(skb->dev);
1888         struct mr_table *mrt;
1889
1890         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1891                 goto drop;
1892
1893         pim = (struct pimreghdr *)skb_transport_header(skb);
1894         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1895             (pim->flags&PIM_NULL_REGISTER) ||
1896             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1897              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1898                 goto drop;
1899
1900         if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1901                 goto drop;
1902
1903         if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1904 drop:
1905                 kfree_skb(skb);
1906         }
1907         return 0;
1908 }
1909 #endif
1910
1911 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1912                               struct mfc_cache *c, struct rtmsg *rtm)
1913 {
1914         int ct;
1915         struct rtnexthop *nhp;
1916         u8 *b = skb_tail_pointer(skb);
1917         struct rtattr *mp_head;
1918
1919         /* If cache is unresolved, don't try to parse IIF and OIF */
1920         if (c->mfc_parent >= MAXVIFS)
1921                 return -ENOENT;
1922
1923         if (VIF_EXISTS(mrt, c->mfc_parent))
1924                 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1925
1926         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1927
1928         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1929                 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1930                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1931                                 goto rtattr_failure;
1932                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1933                         nhp->rtnh_flags = 0;
1934                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1935                         nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1936                         nhp->rtnh_len = sizeof(*nhp);
1937                 }
1938         }
1939         mp_head->rta_type = RTA_MULTIPATH;
1940         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1941         rtm->rtm_type = RTN_MULTICAST;
1942         return 1;
1943
1944 rtattr_failure:
1945         nlmsg_trim(skb, b);
1946         return -EMSGSIZE;
1947 }
1948
1949 int ipmr_get_route(struct net *net,
1950                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1951 {
1952         int err;
1953         struct mr_table *mrt;
1954         struct mfc_cache *cache;
1955         struct rtable *rt = skb_rtable(skb);
1956
1957         mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1958         if (mrt == NULL)
1959                 return -ENOENT;
1960
1961         read_lock(&mrt_lock);
1962         cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1963
1964         if (cache == NULL) {
1965                 struct sk_buff *skb2;
1966                 struct iphdr *iph;
1967                 struct net_device *dev;
1968                 int vif;
1969
1970                 if (nowait) {
1971                         read_unlock(&mrt_lock);
1972                         return -EAGAIN;
1973                 }
1974
1975                 dev = skb->dev;
1976                 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1977                         read_unlock(&mrt_lock);
1978                         return -ENODEV;
1979                 }
1980                 skb2 = skb_clone(skb, GFP_ATOMIC);
1981                 if (!skb2) {
1982                         read_unlock(&mrt_lock);
1983                         return -ENOMEM;
1984                 }
1985
1986                 skb_push(skb2, sizeof(struct iphdr));
1987                 skb_reset_network_header(skb2);
1988                 iph = ip_hdr(skb2);
1989                 iph->ihl = sizeof(struct iphdr) >> 2;
1990                 iph->saddr = rt->rt_src;
1991                 iph->daddr = rt->rt_dst;
1992                 iph->version = 0;
1993                 err = ipmr_cache_unresolved(mrt, vif, skb2);
1994                 read_unlock(&mrt_lock);
1995                 return err;
1996         }
1997
1998         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1999                 cache->mfc_flags |= MFC_NOTIFY;
2000         err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
2001         read_unlock(&mrt_lock);
2002         return err;
2003 }
2004
2005 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2006                             u32 pid, u32 seq, struct mfc_cache *c)
2007 {
2008         struct nlmsghdr *nlh;
2009         struct rtmsg *rtm;
2010
2011         nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2012         if (nlh == NULL)
2013                 return -EMSGSIZE;
2014
2015         rtm = nlmsg_data(nlh);
2016         rtm->rtm_family   = RTNL_FAMILY_IPMR;
2017         rtm->rtm_dst_len  = 32;
2018         rtm->rtm_src_len  = 32;
2019         rtm->rtm_tos      = 0;
2020         rtm->rtm_table    = mrt->id;
2021         NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2022         rtm->rtm_type     = RTN_MULTICAST;
2023         rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2024         rtm->rtm_protocol = RTPROT_UNSPEC;
2025         rtm->rtm_flags    = 0;
2026
2027         NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2028         NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2029
2030         if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2031                 goto nla_put_failure;
2032
2033         return nlmsg_end(skb, nlh);
2034
2035 nla_put_failure:
2036         nlmsg_cancel(skb, nlh);
2037         return -EMSGSIZE;
2038 }
2039
2040 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2041 {
2042         struct net *net = sock_net(skb->sk);
2043         struct mr_table *mrt;
2044         struct mfc_cache *mfc;
2045         unsigned int t = 0, s_t;
2046         unsigned int h = 0, s_h;
2047         unsigned int e = 0, s_e;
2048
2049         s_t = cb->args[0];
2050         s_h = cb->args[1];
2051         s_e = cb->args[2];
2052
2053         read_lock(&mrt_lock);
2054         ipmr_for_each_table(mrt, net) {
2055                 if (t < s_t)
2056                         goto next_table;
2057                 if (t > s_t)
2058                         s_h = 0;
2059                 for (h = s_h; h < MFC_LINES; h++) {
2060                         list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2061                                 if (e < s_e)
2062                                         goto next_entry;
2063                                 if (ipmr_fill_mroute(mrt, skb,
2064                                                      NETLINK_CB(cb->skb).pid,
2065                                                      cb->nlh->nlmsg_seq,
2066                                                      mfc) < 0)
2067                                         goto done;
2068 next_entry:
2069                                 e++;
2070                         }
2071                         e = s_e = 0;
2072                 }
2073                 s_h = 0;
2074 next_table:
2075                 t++;
2076         }
2077 done:
2078         read_unlock(&mrt_lock);
2079
2080         cb->args[2] = e;
2081         cb->args[1] = h;
2082         cb->args[0] = t;
2083
2084         return skb->len;
2085 }
2086
2087 #ifdef CONFIG_PROC_FS
2088 /*
2089  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2090  */
2091 struct ipmr_vif_iter {
2092         struct seq_net_private p;
2093         struct mr_table *mrt;
2094         int ct;
2095 };
2096
2097 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2098                                            struct ipmr_vif_iter *iter,
2099                                            loff_t pos)
2100 {
2101         struct mr_table *mrt = iter->mrt;
2102
2103         for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2104                 if (!VIF_EXISTS(mrt, iter->ct))
2105                         continue;
2106                 if (pos-- == 0)
2107                         return &mrt->vif_table[iter->ct];
2108         }
2109         return NULL;
2110 }
2111
2112 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2113         __acquires(mrt_lock)
2114 {
2115         struct ipmr_vif_iter *iter = seq->private;
2116         struct net *net = seq_file_net(seq);
2117         struct mr_table *mrt;
2118
2119         mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2120         if (mrt == NULL)
2121                 return ERR_PTR(-ENOENT);
2122
2123         iter->mrt = mrt;
2124
2125         read_lock(&mrt_lock);
2126         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2127                 : SEQ_START_TOKEN;
2128 }
2129
2130 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2131 {
2132         struct ipmr_vif_iter *iter = seq->private;
2133         struct net *net = seq_file_net(seq);
2134         struct mr_table *mrt = iter->mrt;
2135
2136         ++*pos;
2137         if (v == SEQ_START_TOKEN)
2138                 return ipmr_vif_seq_idx(net, iter, 0);
2139
2140         while (++iter->ct < mrt->maxvif) {
2141                 if (!VIF_EXISTS(mrt, iter->ct))
2142                         continue;
2143                 return &mrt->vif_table[iter->ct];
2144         }
2145         return NULL;
2146 }
2147
2148 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2149         __releases(mrt_lock)
2150 {
2151         read_unlock(&mrt_lock);
2152 }
2153
2154 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2155 {
2156         struct ipmr_vif_iter *iter = seq->private;
2157         struct mr_table *mrt = iter->mrt;
2158
2159         if (v == SEQ_START_TOKEN) {
2160                 seq_puts(seq,
2161                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
2162         } else {
2163                 const struct vif_device *vif = v;
2164                 const char *name =  vif->dev ? vif->dev->name : "none";
2165
2166                 seq_printf(seq,
2167                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
2168                            vif - mrt->vif_table,
2169                            name, vif->bytes_in, vif->pkt_in,
2170                            vif->bytes_out, vif->pkt_out,
2171                            vif->flags, vif->local, vif->remote);
2172         }
2173         return 0;
2174 }
2175
2176 static const struct seq_operations ipmr_vif_seq_ops = {
2177         .start = ipmr_vif_seq_start,
2178         .next  = ipmr_vif_seq_next,
2179         .stop  = ipmr_vif_seq_stop,
2180         .show  = ipmr_vif_seq_show,
2181 };
2182
2183 static int ipmr_vif_open(struct inode *inode, struct file *file)
2184 {
2185         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2186                             sizeof(struct ipmr_vif_iter));
2187 }
2188
2189 static const struct file_operations ipmr_vif_fops = {
2190         .owner   = THIS_MODULE,
2191         .open    = ipmr_vif_open,
2192         .read    = seq_read,
2193         .llseek  = seq_lseek,
2194         .release = seq_release_net,
2195 };
2196
2197 struct ipmr_mfc_iter {
2198         struct seq_net_private p;
2199         struct mr_table *mrt;
2200         struct list_head *cache;
2201         int ct;
2202 };
2203
2204
2205 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2206                                           struct ipmr_mfc_iter *it, loff_t pos)
2207 {
2208         struct mr_table *mrt = it->mrt;
2209         struct mfc_cache *mfc;
2210
2211         read_lock(&mrt_lock);
2212         for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2213                 it->cache = &mrt->mfc_cache_array[it->ct];
2214                 list_for_each_entry(mfc, it->cache, list)
2215                         if (pos-- == 0)
2216                                 return mfc;
2217         }
2218         read_unlock(&mrt_lock);
2219
2220         spin_lock_bh(&mfc_unres_lock);
2221         it->cache = &mrt->mfc_unres_queue;
2222         list_for_each_entry(mfc, it->cache, list)
2223                 if (pos-- == 0)
2224                         return mfc;
2225         spin_unlock_bh(&mfc_unres_lock);
2226
2227         it->cache = NULL;
2228         return NULL;
2229 }
2230
2231
2232 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2233 {
2234         struct ipmr_mfc_iter *it = seq->private;
2235         struct net *net = seq_file_net(seq);
2236         struct mr_table *mrt;
2237
2238         mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2239         if (mrt == NULL)
2240                 return ERR_PTR(-ENOENT);
2241
2242         it->mrt = mrt;
2243         it->cache = NULL;
2244         it->ct = 0;
2245         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2246                 : SEQ_START_TOKEN;
2247 }
2248
2249 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2250 {
2251         struct mfc_cache *mfc = v;
2252         struct ipmr_mfc_iter *it = seq->private;
2253         struct net *net = seq_file_net(seq);
2254         struct mr_table *mrt = it->mrt;
2255
2256         ++*pos;
2257
2258         if (v == SEQ_START_TOKEN)
2259                 return ipmr_mfc_seq_idx(net, seq->private, 0);
2260
2261         if (mfc->list.next != it->cache)
2262                 return list_entry(mfc->list.next, struct mfc_cache, list);
2263
2264         if (it->cache == &mrt->mfc_unres_queue)
2265                 goto end_of_list;
2266
2267         BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2268
2269         while (++it->ct < MFC_LINES) {
2270                 it->cache = &mrt->mfc_cache_array[it->ct];
2271                 if (list_empty(it->cache))
2272                         continue;
2273                 return list_first_entry(it->cache, struct mfc_cache, list);
2274         }
2275
2276         /* exhausted cache_array, show unresolved */
2277         read_unlock(&mrt_lock);
2278         it->cache = &mrt->mfc_unres_queue;
2279         it->ct = 0;
2280
2281         spin_lock_bh(&mfc_unres_lock);
2282         if (!list_empty(it->cache))
2283                 return list_first_entry(it->cache, struct mfc_cache, list);
2284
2285  end_of_list:
2286         spin_unlock_bh(&mfc_unres_lock);
2287         it->cache = NULL;
2288
2289         return NULL;
2290 }
2291
2292 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2293 {
2294         struct ipmr_mfc_iter *it = seq->private;
2295         struct mr_table *mrt = it->mrt;
2296
2297         if (it->cache == &mrt->mfc_unres_queue)
2298                 spin_unlock_bh(&mfc_unres_lock);
2299         else if (it->cache == &mrt->mfc_cache_array[it->ct])
2300                 read_unlock(&mrt_lock);
2301 }
2302
2303 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2304 {
2305         int n;
2306
2307         if (v == SEQ_START_TOKEN) {
2308                 seq_puts(seq,
2309                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
2310         } else {
2311                 const struct mfc_cache *mfc = v;
2312                 const struct ipmr_mfc_iter *it = seq->private;
2313                 const struct mr_table *mrt = it->mrt;
2314
2315                 seq_printf(seq, "%08X %08X %-3hd",
2316                            (__force u32) mfc->mfc_mcastgrp,
2317                            (__force u32) mfc->mfc_origin,
2318                            mfc->mfc_parent);
2319
2320                 if (it->cache != &mrt->mfc_unres_queue) {
2321                         seq_printf(seq, " %8lu %8lu %8lu",
2322                                    mfc->mfc_un.res.pkt,
2323                                    mfc->mfc_un.res.bytes,
2324                                    mfc->mfc_un.res.wrong_if);
2325                         for (n = mfc->mfc_un.res.minvif;
2326                              n < mfc->mfc_un.res.maxvif; n++ ) {
2327                                 if (VIF_EXISTS(mrt, n) &&
2328                                     mfc->mfc_un.res.ttls[n] < 255)
2329                                         seq_printf(seq,
2330                                            " %2d:%-3d",
2331                                            n, mfc->mfc_un.res.ttls[n]);
2332                         }
2333                 } else {
2334                         /* unresolved mfc_caches don't contain
2335                          * pkt, bytes and wrong_if values
2336                          */
2337                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2338                 }
2339                 seq_putc(seq, '\n');
2340         }
2341         return 0;
2342 }
2343
2344 static const struct seq_operations ipmr_mfc_seq_ops = {
2345         .start = ipmr_mfc_seq_start,
2346         .next  = ipmr_mfc_seq_next,
2347         .stop  = ipmr_mfc_seq_stop,
2348         .show  = ipmr_mfc_seq_show,
2349 };
2350
2351 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2352 {
2353         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2354                             sizeof(struct ipmr_mfc_iter));
2355 }
2356
2357 static const struct file_operations ipmr_mfc_fops = {
2358         .owner   = THIS_MODULE,
2359         .open    = ipmr_mfc_open,
2360         .read    = seq_read,
2361         .llseek  = seq_lseek,
2362         .release = seq_release_net,
2363 };
2364 #endif
2365
2366 #ifdef CONFIG_IP_PIMSM_V2
2367 static const struct net_protocol pim_protocol = {
2368         .handler        =       pim_rcv,
2369         .netns_ok       =       1,
2370 };
2371 #endif
2372
2373
2374 /*
2375  *      Setup for IP multicast routing
2376  */
2377 static int __net_init ipmr_net_init(struct net *net)
2378 {
2379         int err;
2380
2381         err = ipmr_rules_init(net);
2382         if (err < 0)
2383                 goto fail;
2384
2385 #ifdef CONFIG_PROC_FS
2386         err = -ENOMEM;
2387         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2388                 goto proc_vif_fail;
2389         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2390                 goto proc_cache_fail;
2391 #endif
2392         return 0;
2393
2394 #ifdef CONFIG_PROC_FS
2395 proc_cache_fail:
2396         proc_net_remove(net, "ip_mr_vif");
2397 proc_vif_fail:
2398         ipmr_rules_exit(net);
2399 #endif
2400 fail:
2401         return err;
2402 }
2403
2404 static void __net_exit ipmr_net_exit(struct net *net)
2405 {
2406 #ifdef CONFIG_PROC_FS
2407         proc_net_remove(net, "ip_mr_cache");
2408         proc_net_remove(net, "ip_mr_vif");
2409 #endif
2410         ipmr_rules_exit(net);
2411 }
2412
2413 static struct pernet_operations ipmr_net_ops = {
2414         .init = ipmr_net_init,
2415         .exit = ipmr_net_exit,
2416 };
2417
2418 int __init ip_mr_init(void)
2419 {
2420         int err;
2421
2422         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2423                                        sizeof(struct mfc_cache),
2424                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2425                                        NULL);
2426         if (!mrt_cachep)
2427                 return -ENOMEM;
2428
2429         err = register_pernet_subsys(&ipmr_net_ops);
2430         if (err)
2431                 goto reg_pernet_fail;
2432
2433         err = register_netdevice_notifier(&ip_mr_notifier);
2434         if (err)
2435                 goto reg_notif_fail;
2436 #ifdef CONFIG_IP_PIMSM_V2
2437         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2438                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2439                 err = -EAGAIN;
2440                 goto add_proto_fail;
2441         }
2442 #endif
2443         rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2444         return 0;
2445
2446 #ifdef CONFIG_IP_PIMSM_V2
2447 add_proto_fail:
2448         unregister_netdevice_notifier(&ip_mr_notifier);
2449 #endif
2450 reg_notif_fail:
2451         unregister_pernet_subsys(&ipmr_net_ops);
2452 reg_pernet_fail:
2453         kmem_cache_destroy(mrt_cachep);
2454         return err;
2455 }