ipv6: Do not depend on rt->n in rt6_check_neigh().
[linux-3.10.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 0,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326         struct inet_peer_base *base;
327         struct inet_peer *peer;
328
329         base = inetpeer_base_ptr(rt->_rt6i_peer);
330         if (!base)
331                 return;
332
333         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334         if (peer) {
335                 if (!rt6_set_peer(rt, peer))
336                         inet_putpeer(peer);
337         }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341                            int how)
342 {
343         struct rt6_info *rt = (struct rt6_info *)dst;
344         struct inet6_dev *idev = rt->rt6i_idev;
345         struct net_device *loopback_dev =
346                 dev_net(dev)->loopback_dev;
347
348         if (dev != loopback_dev) {
349                 if (idev && idev->dev == dev) {
350                         struct inet6_dev *loopback_idev =
351                                 in6_dev_get(loopback_dev);
352                         if (loopback_idev) {
353                                 rt->rt6i_idev = loopback_idev;
354                                 in6_dev_put(idev);
355                         }
356                 }
357                 if (rt->n && rt->n->dev == dev) {
358                         rt->n->dev = loopback_dev;
359                         dev_hold(loopback_dev);
360                         dev_put(dev);
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378         return ipv6_addr_type(daddr) &
379                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383  *   Hash based function using packet header and flowlabel.
384  * Adapted from fib_info_hashfn()
385  */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387                                const struct flowi6 *fl6)
388 {
389         unsigned int val = fl6->flowi6_proto;
390
391         val ^= ipv6_addr_hash(&fl6->daddr);
392         val ^= ipv6_addr_hash(&fl6->saddr);
393
394         /* Work only if this not encapsulated */
395         switch (fl6->flowi6_proto) {
396         case IPPROTO_UDP:
397         case IPPROTO_TCP:
398         case IPPROTO_SCTP:
399                 val ^= (__force u16)fl6->fl6_sport;
400                 val ^= (__force u16)fl6->fl6_dport;
401                 break;
402
403         case IPPROTO_ICMPV6:
404                 val ^= (__force u16)fl6->fl6_icmp_type;
405                 val ^= (__force u16)fl6->fl6_icmp_code;
406                 break;
407         }
408         /* RFC6438 recommands to use flowlabel */
409         val ^= (__force u32)fl6->flowlabel;
410
411         /* Perhaps, we need to tune, this function? */
412         val = val ^ (val >> 7) ^ (val >> 12);
413         return val % candidate_count;
414 }
415
416 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
417                                              struct flowi6 *fl6)
418 {
419         struct rt6_info *sibling, *next_sibling;
420         int route_choosen;
421
422         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
423         /* Don't change the route, if route_choosen == 0
424          * (siblings does not include ourself)
425          */
426         if (route_choosen)
427                 list_for_each_entry_safe(sibling, next_sibling,
428                                 &match->rt6i_siblings, rt6i_siblings) {
429                         route_choosen--;
430                         if (route_choosen == 0) {
431                                 match = sibling;
432                                 break;
433                         }
434                 }
435         return match;
436 }
437
438 /*
439  *      Route lookup. Any table->tb6_lock is implied.
440  */
441
442 static inline struct rt6_info *rt6_device_match(struct net *net,
443                                                     struct rt6_info *rt,
444                                                     const struct in6_addr *saddr,
445                                                     int oif,
446                                                     int flags)
447 {
448         struct rt6_info *local = NULL;
449         struct rt6_info *sprt;
450
451         if (!oif && ipv6_addr_any(saddr))
452                 goto out;
453
454         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
455                 struct net_device *dev = sprt->dst.dev;
456
457                 if (oif) {
458                         if (dev->ifindex == oif)
459                                 return sprt;
460                         if (dev->flags & IFF_LOOPBACK) {
461                                 if (!sprt->rt6i_idev ||
462                                     sprt->rt6i_idev->dev->ifindex != oif) {
463                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
464                                                 continue;
465                                         if (local && (!oif ||
466                                                       local->rt6i_idev->dev->ifindex == oif))
467                                                 continue;
468                                 }
469                                 local = sprt;
470                         }
471                 } else {
472                         if (ipv6_chk_addr(net, saddr, dev,
473                                           flags & RT6_LOOKUP_F_IFACE))
474                                 return sprt;
475                 }
476         }
477
478         if (oif) {
479                 if (local)
480                         return local;
481
482                 if (flags & RT6_LOOKUP_F_IFACE)
483                         return net->ipv6.ip6_null_entry;
484         }
485 out:
486         return rt;
487 }
488
489 #ifdef CONFIG_IPV6_ROUTER_PREF
490 static void rt6_probe(struct rt6_info *rt)
491 {
492         struct neighbour *neigh;
493         /*
494          * Okay, this does not seem to be appropriate
495          * for now, however, we need to check if it
496          * is really so; aka Router Reachability Probing.
497          *
498          * Router Reachability Probe MUST be rate-limited
499          * to no more than one per minute.
500          */
501         neigh = rt ? rt->n : NULL;
502         if (!neigh)
503                 return;
504         write_lock_bh(&neigh->lock);
505         if (neigh->nud_state & NUD_VALID) {
506                 write_unlock_bh(&neigh->lock);
507                 return;
508         }
509         if (!(neigh->nud_state & NUD_VALID) &&
510             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
511                 struct in6_addr mcaddr;
512                 struct in6_addr *target;
513
514                 neigh->updated = jiffies;
515                 write_unlock_bh(&neigh->lock);
516
517                 target = (struct in6_addr *)&neigh->primary_key;
518                 addrconf_addr_solict_mult(target, &mcaddr);
519                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
520         } else {
521                 write_unlock_bh(&neigh->lock);
522         }
523 }
524 #else
525 static inline void rt6_probe(struct rt6_info *rt)
526 {
527 }
528 #endif
529
530 /*
531  * Default Router Selection (RFC 2461 6.3.6)
532  */
533 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
534 {
535         struct net_device *dev = rt->dst.dev;
536         if (!oif || dev->ifindex == oif)
537                 return 2;
538         if ((dev->flags & IFF_LOOPBACK) &&
539             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
540                 return 1;
541         return 0;
542 }
543
544 static inline bool rt6_check_neigh(struct rt6_info *rt)
545 {
546         struct neighbour *neigh;
547         bool ret = false;
548
549         if (rt->rt6i_flags & RTF_NONEXTHOP ||
550             !(rt->rt6i_flags & RTF_GATEWAY))
551                 return true;
552
553         rcu_read_lock_bh();
554         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
555         if (neigh) {
556                 read_lock(&neigh->lock);
557                 if (neigh->nud_state & NUD_VALID)
558                         ret = true;
559 #ifdef CONFIG_IPV6_ROUTER_PREF
560                 else if (!(neigh->nud_state & NUD_FAILED))
561                         ret = true;
562 #endif
563                 read_unlock(&neigh->lock);
564         }
565         rcu_read_unlock_bh();
566
567         return ret;
568 }
569
570 static int rt6_score_route(struct rt6_info *rt, int oif,
571                            int strict)
572 {
573         int m;
574
575         m = rt6_check_dev(rt, oif);
576         if (!m && (strict & RT6_LOOKUP_F_IFACE))
577                 return -1;
578 #ifdef CONFIG_IPV6_ROUTER_PREF
579         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
580 #endif
581         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
582                 return -1;
583         return m;
584 }
585
586 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
587                                    int *mpri, struct rt6_info *match)
588 {
589         int m;
590
591         if (rt6_check_expired(rt))
592                 goto out;
593
594         m = rt6_score_route(rt, oif, strict);
595         if (m < 0)
596                 goto out;
597
598         if (m > *mpri) {
599                 if (strict & RT6_LOOKUP_F_REACHABLE)
600                         rt6_probe(match);
601                 *mpri = m;
602                 match = rt;
603         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
604                 rt6_probe(rt);
605         }
606
607 out:
608         return match;
609 }
610
611 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
612                                      struct rt6_info *rr_head,
613                                      u32 metric, int oif, int strict)
614 {
615         struct rt6_info *rt, *match;
616         int mpri = -1;
617
618         match = NULL;
619         for (rt = rr_head; rt && rt->rt6i_metric == metric;
620              rt = rt->dst.rt6_next)
621                 match = find_match(rt, oif, strict, &mpri, match);
622         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
623              rt = rt->dst.rt6_next)
624                 match = find_match(rt, oif, strict, &mpri, match);
625
626         return match;
627 }
628
629 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
630 {
631         struct rt6_info *match, *rt0;
632         struct net *net;
633
634         rt0 = fn->rr_ptr;
635         if (!rt0)
636                 fn->rr_ptr = rt0 = fn->leaf;
637
638         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
639
640         if (!match &&
641             (strict & RT6_LOOKUP_F_REACHABLE)) {
642                 struct rt6_info *next = rt0->dst.rt6_next;
643
644                 /* no entries matched; do round-robin */
645                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
646                         next = fn->leaf;
647
648                 if (next != rt0)
649                         fn->rr_ptr = next;
650         }
651
652         net = dev_net(rt0->dst.dev);
653         return match ? match : net->ipv6.ip6_null_entry;
654 }
655
656 #ifdef CONFIG_IPV6_ROUTE_INFO
657 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
658                   const struct in6_addr *gwaddr)
659 {
660         struct net *net = dev_net(dev);
661         struct route_info *rinfo = (struct route_info *) opt;
662         struct in6_addr prefix_buf, *prefix;
663         unsigned int pref;
664         unsigned long lifetime;
665         struct rt6_info *rt;
666
667         if (len < sizeof(struct route_info)) {
668                 return -EINVAL;
669         }
670
671         /* Sanity check for prefix_len and length */
672         if (rinfo->length > 3) {
673                 return -EINVAL;
674         } else if (rinfo->prefix_len > 128) {
675                 return -EINVAL;
676         } else if (rinfo->prefix_len > 64) {
677                 if (rinfo->length < 2) {
678                         return -EINVAL;
679                 }
680         } else if (rinfo->prefix_len > 0) {
681                 if (rinfo->length < 1) {
682                         return -EINVAL;
683                 }
684         }
685
686         pref = rinfo->route_pref;
687         if (pref == ICMPV6_ROUTER_PREF_INVALID)
688                 return -EINVAL;
689
690         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
691
692         if (rinfo->length == 3)
693                 prefix = (struct in6_addr *)rinfo->prefix;
694         else {
695                 /* this function is safe */
696                 ipv6_addr_prefix(&prefix_buf,
697                                  (struct in6_addr *)rinfo->prefix,
698                                  rinfo->prefix_len);
699                 prefix = &prefix_buf;
700         }
701
702         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
703                                 dev->ifindex);
704
705         if (rt && !lifetime) {
706                 ip6_del_rt(rt);
707                 rt = NULL;
708         }
709
710         if (!rt && lifetime)
711                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
712                                         pref);
713         else if (rt)
714                 rt->rt6i_flags = RTF_ROUTEINFO |
715                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
716
717         if (rt) {
718                 if (!addrconf_finite_timeout(lifetime))
719                         rt6_clean_expires(rt);
720                 else
721                         rt6_set_expires(rt, jiffies + HZ * lifetime);
722
723                 ip6_rt_put(rt);
724         }
725         return 0;
726 }
727 #endif
728
729 #define BACKTRACK(__net, saddr)                 \
730 do { \
731         if (rt == __net->ipv6.ip6_null_entry) { \
732                 struct fib6_node *pn; \
733                 while (1) { \
734                         if (fn->fn_flags & RTN_TL_ROOT) \
735                                 goto out; \
736                         pn = fn->parent; \
737                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
738                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
739                         else \
740                                 fn = pn; \
741                         if (fn->fn_flags & RTN_RTINFO) \
742                                 goto restart; \
743                 } \
744         } \
745 } while (0)
746
747 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
748                                              struct fib6_table *table,
749                                              struct flowi6 *fl6, int flags)
750 {
751         struct fib6_node *fn;
752         struct rt6_info *rt;
753
754         read_lock_bh(&table->tb6_lock);
755         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
756 restart:
757         rt = fn->leaf;
758         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
759         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
760                 rt = rt6_multipath_select(rt, fl6);
761         BACKTRACK(net, &fl6->saddr);
762 out:
763         dst_use(&rt->dst, jiffies);
764         read_unlock_bh(&table->tb6_lock);
765         return rt;
766
767 }
768
769 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
770                                     int flags)
771 {
772         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
773 }
774 EXPORT_SYMBOL_GPL(ip6_route_lookup);
775
776 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
777                             const struct in6_addr *saddr, int oif, int strict)
778 {
779         struct flowi6 fl6 = {
780                 .flowi6_oif = oif,
781                 .daddr = *daddr,
782         };
783         struct dst_entry *dst;
784         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
785
786         if (saddr) {
787                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
788                 flags |= RT6_LOOKUP_F_HAS_SADDR;
789         }
790
791         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
792         if (dst->error == 0)
793                 return (struct rt6_info *) dst;
794
795         dst_release(dst);
796
797         return NULL;
798 }
799
800 EXPORT_SYMBOL(rt6_lookup);
801
802 /* ip6_ins_rt is called with FREE table->tb6_lock.
803    It takes new route entry, the addition fails by any reason the
804    route is freed. In any case, if caller does not hold it, it may
805    be destroyed.
806  */
807
808 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
809 {
810         int err;
811         struct fib6_table *table;
812
813         table = rt->rt6i_table;
814         write_lock_bh(&table->tb6_lock);
815         err = fib6_add(&table->tb6_root, rt, info);
816         write_unlock_bh(&table->tb6_lock);
817
818         return err;
819 }
820
821 int ip6_ins_rt(struct rt6_info *rt)
822 {
823         struct nl_info info = {
824                 .nl_net = dev_net(rt->dst.dev),
825         };
826         return __ip6_ins_rt(rt, &info);
827 }
828
829 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
830                                       const struct in6_addr *daddr,
831                                       const struct in6_addr *saddr)
832 {
833         struct rt6_info *rt;
834
835         /*
836          *      Clone the route.
837          */
838
839         rt = ip6_rt_copy(ort, daddr);
840
841         if (rt) {
842                 int attempts = !in_softirq();
843
844                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
845                         if (ort->rt6i_dst.plen != 128 &&
846                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
847                                 rt->rt6i_flags |= RTF_ANYCAST;
848                         rt->rt6i_gateway = *daddr;
849                 }
850
851                 rt->rt6i_flags |= RTF_CACHE;
852
853 #ifdef CONFIG_IPV6_SUBTREES
854                 if (rt->rt6i_src.plen && saddr) {
855                         rt->rt6i_src.addr = *saddr;
856                         rt->rt6i_src.plen = 128;
857                 }
858 #endif
859
860         retry:
861                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
862                         struct net *net = dev_net(rt->dst.dev);
863                         int saved_rt_min_interval =
864                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
865                         int saved_rt_elasticity =
866                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
867
868                         if (attempts-- > 0) {
869                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
870                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
871
872                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
873
874                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
875                                         saved_rt_elasticity;
876                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
877                                         saved_rt_min_interval;
878                                 goto retry;
879                         }
880
881                         net_warn_ratelimited("Neighbour table overflow\n");
882                         dst_free(&rt->dst);
883                         return NULL;
884                 }
885         }
886
887         return rt;
888 }
889
890 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
891                                         const struct in6_addr *daddr)
892 {
893         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
894
895         if (rt) {
896                 rt->rt6i_flags |= RTF_CACHE;
897                 rt->n = neigh_clone(ort->n);
898         }
899         return rt;
900 }
901
902 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
903                                       struct flowi6 *fl6, int flags)
904 {
905         struct fib6_node *fn;
906         struct rt6_info *rt, *nrt;
907         int strict = 0;
908         int attempts = 3;
909         int err;
910         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
911
912         strict |= flags & RT6_LOOKUP_F_IFACE;
913
914 relookup:
915         read_lock_bh(&table->tb6_lock);
916
917 restart_2:
918         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
919
920 restart:
921         rt = rt6_select(fn, oif, strict | reachable);
922         if (rt->rt6i_nsiblings && oif == 0)
923                 rt = rt6_multipath_select(rt, fl6);
924         BACKTRACK(net, &fl6->saddr);
925         if (rt == net->ipv6.ip6_null_entry ||
926             rt->rt6i_flags & RTF_CACHE)
927                 goto out;
928
929         dst_hold(&rt->dst);
930         read_unlock_bh(&table->tb6_lock);
931
932         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
933                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
934         else if (!(rt->dst.flags & DST_HOST))
935                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
936         else
937                 goto out2;
938
939         ip6_rt_put(rt);
940         rt = nrt ? : net->ipv6.ip6_null_entry;
941
942         dst_hold(&rt->dst);
943         if (nrt) {
944                 err = ip6_ins_rt(nrt);
945                 if (!err)
946                         goto out2;
947         }
948
949         if (--attempts <= 0)
950                 goto out2;
951
952         /*
953          * Race condition! In the gap, when table->tb6_lock was
954          * released someone could insert this route.  Relookup.
955          */
956         ip6_rt_put(rt);
957         goto relookup;
958
959 out:
960         if (reachable) {
961                 reachable = 0;
962                 goto restart_2;
963         }
964         dst_hold(&rt->dst);
965         read_unlock_bh(&table->tb6_lock);
966 out2:
967         rt->dst.lastuse = jiffies;
968         rt->dst.__use++;
969
970         return rt;
971 }
972
973 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
974                                             struct flowi6 *fl6, int flags)
975 {
976         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
977 }
978
979 static struct dst_entry *ip6_route_input_lookup(struct net *net,
980                                                 struct net_device *dev,
981                                                 struct flowi6 *fl6, int flags)
982 {
983         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
984                 flags |= RT6_LOOKUP_F_IFACE;
985
986         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
987 }
988
989 void ip6_route_input(struct sk_buff *skb)
990 {
991         const struct ipv6hdr *iph = ipv6_hdr(skb);
992         struct net *net = dev_net(skb->dev);
993         int flags = RT6_LOOKUP_F_HAS_SADDR;
994         struct flowi6 fl6 = {
995                 .flowi6_iif = skb->dev->ifindex,
996                 .daddr = iph->daddr,
997                 .saddr = iph->saddr,
998                 .flowlabel = ip6_flowinfo(iph),
999                 .flowi6_mark = skb->mark,
1000                 .flowi6_proto = iph->nexthdr,
1001         };
1002
1003         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1004 }
1005
1006 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1007                                              struct flowi6 *fl6, int flags)
1008 {
1009         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1010 }
1011
1012 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1013                                     struct flowi6 *fl6)
1014 {
1015         int flags = 0;
1016
1017         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1018
1019         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1020                 flags |= RT6_LOOKUP_F_IFACE;
1021
1022         if (!ipv6_addr_any(&fl6->saddr))
1023                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1024         else if (sk)
1025                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1026
1027         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1028 }
1029
1030 EXPORT_SYMBOL(ip6_route_output);
1031
1032 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1033 {
1034         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1035         struct dst_entry *new = NULL;
1036
1037         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1038         if (rt) {
1039                 new = &rt->dst;
1040
1041                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1042                 rt6_init_peer(rt, net->ipv6.peers);
1043
1044                 new->__use = 1;
1045                 new->input = dst_discard;
1046                 new->output = dst_discard;
1047
1048                 if (dst_metrics_read_only(&ort->dst))
1049                         new->_metrics = ort->dst._metrics;
1050                 else
1051                         dst_copy_metrics(new, &ort->dst);
1052                 rt->rt6i_idev = ort->rt6i_idev;
1053                 if (rt->rt6i_idev)
1054                         in6_dev_hold(rt->rt6i_idev);
1055
1056                 rt->rt6i_gateway = ort->rt6i_gateway;
1057                 rt->rt6i_flags = ort->rt6i_flags;
1058                 rt6_clean_expires(rt);
1059                 rt->rt6i_metric = 0;
1060
1061                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1062 #ifdef CONFIG_IPV6_SUBTREES
1063                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1064 #endif
1065
1066                 dst_free(new);
1067         }
1068
1069         dst_release(dst_orig);
1070         return new ? new : ERR_PTR(-ENOMEM);
1071 }
1072
1073 /*
1074  *      Destination cache support functions
1075  */
1076
1077 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1078 {
1079         struct rt6_info *rt;
1080
1081         rt = (struct rt6_info *) dst;
1082
1083         /* All IPV6 dsts are created with ->obsolete set to the value
1084          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1085          * into this function always.
1086          */
1087         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1088                 return NULL;
1089
1090         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1091                 return dst;
1092
1093         return NULL;
1094 }
1095
1096 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1097 {
1098         struct rt6_info *rt = (struct rt6_info *) dst;
1099
1100         if (rt) {
1101                 if (rt->rt6i_flags & RTF_CACHE) {
1102                         if (rt6_check_expired(rt)) {
1103                                 ip6_del_rt(rt);
1104                                 dst = NULL;
1105                         }
1106                 } else {
1107                         dst_release(dst);
1108                         dst = NULL;
1109                 }
1110         }
1111         return dst;
1112 }
1113
1114 static void ip6_link_failure(struct sk_buff *skb)
1115 {
1116         struct rt6_info *rt;
1117
1118         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1119
1120         rt = (struct rt6_info *) skb_dst(skb);
1121         if (rt) {
1122                 if (rt->rt6i_flags & RTF_CACHE)
1123                         rt6_update_expires(rt, 0);
1124                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1125                         rt->rt6i_node->fn_sernum = -1;
1126         }
1127 }
1128
1129 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1130                                struct sk_buff *skb, u32 mtu)
1131 {
1132         struct rt6_info *rt6 = (struct rt6_info*)dst;
1133
1134         dst_confirm(dst);
1135         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1136                 struct net *net = dev_net(dst->dev);
1137
1138                 rt6->rt6i_flags |= RTF_MODIFIED;
1139                 if (mtu < IPV6_MIN_MTU) {
1140                         u32 features = dst_metric(dst, RTAX_FEATURES);
1141                         mtu = IPV6_MIN_MTU;
1142                         features |= RTAX_FEATURE_ALLFRAG;
1143                         dst_metric_set(dst, RTAX_FEATURES, features);
1144                 }
1145                 dst_metric_set(dst, RTAX_MTU, mtu);
1146                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1147         }
1148 }
1149
1150 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1151                      int oif, u32 mark)
1152 {
1153         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1154         struct dst_entry *dst;
1155         struct flowi6 fl6;
1156
1157         memset(&fl6, 0, sizeof(fl6));
1158         fl6.flowi6_oif = oif;
1159         fl6.flowi6_mark = mark;
1160         fl6.flowi6_flags = 0;
1161         fl6.daddr = iph->daddr;
1162         fl6.saddr = iph->saddr;
1163         fl6.flowlabel = ip6_flowinfo(iph);
1164
1165         dst = ip6_route_output(net, NULL, &fl6);
1166         if (!dst->error)
1167                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1168         dst_release(dst);
1169 }
1170 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1171
1172 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1173 {
1174         ip6_update_pmtu(skb, sock_net(sk), mtu,
1175                         sk->sk_bound_dev_if, sk->sk_mark);
1176 }
1177 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1178
1179 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1180 {
1181         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1182         struct dst_entry *dst;
1183         struct flowi6 fl6;
1184
1185         memset(&fl6, 0, sizeof(fl6));
1186         fl6.flowi6_oif = oif;
1187         fl6.flowi6_mark = mark;
1188         fl6.flowi6_flags = 0;
1189         fl6.daddr = iph->daddr;
1190         fl6.saddr = iph->saddr;
1191         fl6.flowlabel = ip6_flowinfo(iph);
1192
1193         dst = ip6_route_output(net, NULL, &fl6);
1194         if (!dst->error)
1195                 rt6_do_redirect(dst, NULL, skb);
1196         dst_release(dst);
1197 }
1198 EXPORT_SYMBOL_GPL(ip6_redirect);
1199
1200 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1201 {
1202         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1203 }
1204 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1205
1206 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1207 {
1208         struct net_device *dev = dst->dev;
1209         unsigned int mtu = dst_mtu(dst);
1210         struct net *net = dev_net(dev);
1211
1212         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1213
1214         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1215                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1216
1217         /*
1218          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1219          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1220          * IPV6_MAXPLEN is also valid and means: "any MSS,
1221          * rely only on pmtu discovery"
1222          */
1223         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1224                 mtu = IPV6_MAXPLEN;
1225         return mtu;
1226 }
1227
1228 static unsigned int ip6_mtu(const struct dst_entry *dst)
1229 {
1230         struct inet6_dev *idev;
1231         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1232
1233         if (mtu)
1234                 return mtu;
1235
1236         mtu = IPV6_MIN_MTU;
1237
1238         rcu_read_lock();
1239         idev = __in6_dev_get(dst->dev);
1240         if (idev)
1241                 mtu = idev->cnf.mtu6;
1242         rcu_read_unlock();
1243
1244         return mtu;
1245 }
1246
1247 static struct dst_entry *icmp6_dst_gc_list;
1248 static DEFINE_SPINLOCK(icmp6_dst_lock);
1249
1250 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1251                                   struct neighbour *neigh,
1252                                   struct flowi6 *fl6)
1253 {
1254         struct dst_entry *dst;
1255         struct rt6_info *rt;
1256         struct inet6_dev *idev = in6_dev_get(dev);
1257         struct net *net = dev_net(dev);
1258
1259         if (unlikely(!idev))
1260                 return ERR_PTR(-ENODEV);
1261
1262         rt = ip6_dst_alloc(net, dev, 0, NULL);
1263         if (unlikely(!rt)) {
1264                 in6_dev_put(idev);
1265                 dst = ERR_PTR(-ENOMEM);
1266                 goto out;
1267         }
1268
1269         if (neigh)
1270                 neigh_hold(neigh);
1271         else {
1272                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1273                 if (IS_ERR(neigh)) {
1274                         in6_dev_put(idev);
1275                         dst_free(&rt->dst);
1276                         return ERR_CAST(neigh);
1277                 }
1278         }
1279
1280         rt->dst.flags |= DST_HOST;
1281         rt->dst.output  = ip6_output;
1282         rt->n = neigh;
1283         atomic_set(&rt->dst.__refcnt, 1);
1284         rt->rt6i_dst.addr = fl6->daddr;
1285         rt->rt6i_dst.plen = 128;
1286         rt->rt6i_idev     = idev;
1287         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1288
1289         spin_lock_bh(&icmp6_dst_lock);
1290         rt->dst.next = icmp6_dst_gc_list;
1291         icmp6_dst_gc_list = &rt->dst;
1292         spin_unlock_bh(&icmp6_dst_lock);
1293
1294         fib6_force_start_gc(net);
1295
1296         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1297
1298 out:
1299         return dst;
1300 }
1301
1302 int icmp6_dst_gc(void)
1303 {
1304         struct dst_entry *dst, **pprev;
1305         int more = 0;
1306
1307         spin_lock_bh(&icmp6_dst_lock);
1308         pprev = &icmp6_dst_gc_list;
1309
1310         while ((dst = *pprev) != NULL) {
1311                 if (!atomic_read(&dst->__refcnt)) {
1312                         *pprev = dst->next;
1313                         dst_free(dst);
1314                 } else {
1315                         pprev = &dst->next;
1316                         ++more;
1317                 }
1318         }
1319
1320         spin_unlock_bh(&icmp6_dst_lock);
1321
1322         return more;
1323 }
1324
1325 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1326                             void *arg)
1327 {
1328         struct dst_entry *dst, **pprev;
1329
1330         spin_lock_bh(&icmp6_dst_lock);
1331         pprev = &icmp6_dst_gc_list;
1332         while ((dst = *pprev) != NULL) {
1333                 struct rt6_info *rt = (struct rt6_info *) dst;
1334                 if (func(rt, arg)) {
1335                         *pprev = dst->next;
1336                         dst_free(dst);
1337                 } else {
1338                         pprev = &dst->next;
1339                 }
1340         }
1341         spin_unlock_bh(&icmp6_dst_lock);
1342 }
1343
1344 static int ip6_dst_gc(struct dst_ops *ops)
1345 {
1346         unsigned long now = jiffies;
1347         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1348         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1349         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1350         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1351         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1352         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1353         int entries;
1354
1355         entries = dst_entries_get_fast(ops);
1356         if (time_after(rt_last_gc + rt_min_interval, now) &&
1357             entries <= rt_max_size)
1358                 goto out;
1359
1360         net->ipv6.ip6_rt_gc_expire++;
1361         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1362         net->ipv6.ip6_rt_last_gc = now;
1363         entries = dst_entries_get_slow(ops);
1364         if (entries < ops->gc_thresh)
1365                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1366 out:
1367         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1368         return entries > rt_max_size;
1369 }
1370
1371 int ip6_dst_hoplimit(struct dst_entry *dst)
1372 {
1373         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1374         if (hoplimit == 0) {
1375                 struct net_device *dev = dst->dev;
1376                 struct inet6_dev *idev;
1377
1378                 rcu_read_lock();
1379                 idev = __in6_dev_get(dev);
1380                 if (idev)
1381                         hoplimit = idev->cnf.hop_limit;
1382                 else
1383                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1384                 rcu_read_unlock();
1385         }
1386         return hoplimit;
1387 }
1388 EXPORT_SYMBOL(ip6_dst_hoplimit);
1389
1390 /*
1391  *
1392  */
1393
1394 int ip6_route_add(struct fib6_config *cfg)
1395 {
1396         int err;
1397         struct net *net = cfg->fc_nlinfo.nl_net;
1398         struct rt6_info *rt = NULL;
1399         struct net_device *dev = NULL;
1400         struct inet6_dev *idev = NULL;
1401         struct fib6_table *table;
1402         int addr_type;
1403
1404         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1405                 return -EINVAL;
1406 #ifndef CONFIG_IPV6_SUBTREES
1407         if (cfg->fc_src_len)
1408                 return -EINVAL;
1409 #endif
1410         if (cfg->fc_ifindex) {
1411                 err = -ENODEV;
1412                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1413                 if (!dev)
1414                         goto out;
1415                 idev = in6_dev_get(dev);
1416                 if (!idev)
1417                         goto out;
1418         }
1419
1420         if (cfg->fc_metric == 0)
1421                 cfg->fc_metric = IP6_RT_PRIO_USER;
1422
1423         err = -ENOBUFS;
1424         if (cfg->fc_nlinfo.nlh &&
1425             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1426                 table = fib6_get_table(net, cfg->fc_table);
1427                 if (!table) {
1428                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1429                         table = fib6_new_table(net, cfg->fc_table);
1430                 }
1431         } else {
1432                 table = fib6_new_table(net, cfg->fc_table);
1433         }
1434
1435         if (!table)
1436                 goto out;
1437
1438         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1439
1440         if (!rt) {
1441                 err = -ENOMEM;
1442                 goto out;
1443         }
1444
1445         if (cfg->fc_flags & RTF_EXPIRES)
1446                 rt6_set_expires(rt, jiffies +
1447                                 clock_t_to_jiffies(cfg->fc_expires));
1448         else
1449                 rt6_clean_expires(rt);
1450
1451         if (cfg->fc_protocol == RTPROT_UNSPEC)
1452                 cfg->fc_protocol = RTPROT_BOOT;
1453         rt->rt6i_protocol = cfg->fc_protocol;
1454
1455         addr_type = ipv6_addr_type(&cfg->fc_dst);
1456
1457         if (addr_type & IPV6_ADDR_MULTICAST)
1458                 rt->dst.input = ip6_mc_input;
1459         else if (cfg->fc_flags & RTF_LOCAL)
1460                 rt->dst.input = ip6_input;
1461         else
1462                 rt->dst.input = ip6_forward;
1463
1464         rt->dst.output = ip6_output;
1465
1466         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1467         rt->rt6i_dst.plen = cfg->fc_dst_len;
1468         if (rt->rt6i_dst.plen == 128)
1469                rt->dst.flags |= DST_HOST;
1470
1471         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1472                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1473                 if (!metrics) {
1474                         err = -ENOMEM;
1475                         goto out;
1476                 }
1477                 dst_init_metrics(&rt->dst, metrics, 0);
1478         }
1479 #ifdef CONFIG_IPV6_SUBTREES
1480         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1481         rt->rt6i_src.plen = cfg->fc_src_len;
1482 #endif
1483
1484         rt->rt6i_metric = cfg->fc_metric;
1485
1486         /* We cannot add true routes via loopback here,
1487            they would result in kernel looping; promote them to reject routes
1488          */
1489         if ((cfg->fc_flags & RTF_REJECT) ||
1490             (dev && (dev->flags & IFF_LOOPBACK) &&
1491              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1492              !(cfg->fc_flags & RTF_LOCAL))) {
1493                 /* hold loopback dev/idev if we haven't done so. */
1494                 if (dev != net->loopback_dev) {
1495                         if (dev) {
1496                                 dev_put(dev);
1497                                 in6_dev_put(idev);
1498                         }
1499                         dev = net->loopback_dev;
1500                         dev_hold(dev);
1501                         idev = in6_dev_get(dev);
1502                         if (!idev) {
1503                                 err = -ENODEV;
1504                                 goto out;
1505                         }
1506                 }
1507                 rt->dst.output = ip6_pkt_discard_out;
1508                 rt->dst.input = ip6_pkt_discard;
1509                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1510                 switch (cfg->fc_type) {
1511                 case RTN_BLACKHOLE:
1512                         rt->dst.error = -EINVAL;
1513                         break;
1514                 case RTN_PROHIBIT:
1515                         rt->dst.error = -EACCES;
1516                         break;
1517                 case RTN_THROW:
1518                         rt->dst.error = -EAGAIN;
1519                         break;
1520                 default:
1521                         rt->dst.error = -ENETUNREACH;
1522                         break;
1523                 }
1524                 goto install_route;
1525         }
1526
1527         if (cfg->fc_flags & RTF_GATEWAY) {
1528                 const struct in6_addr *gw_addr;
1529                 int gwa_type;
1530
1531                 gw_addr = &cfg->fc_gateway;
1532                 rt->rt6i_gateway = *gw_addr;
1533                 gwa_type = ipv6_addr_type(gw_addr);
1534
1535                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1536                         struct rt6_info *grt;
1537
1538                         /* IPv6 strictly inhibits using not link-local
1539                            addresses as nexthop address.
1540                            Otherwise, router will not able to send redirects.
1541                            It is very good, but in some (rare!) circumstances
1542                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1543                            some exceptions. --ANK
1544                          */
1545                         err = -EINVAL;
1546                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1547                                 goto out;
1548
1549                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1550
1551                         err = -EHOSTUNREACH;
1552                         if (!grt)
1553                                 goto out;
1554                         if (dev) {
1555                                 if (dev != grt->dst.dev) {
1556                                         ip6_rt_put(grt);
1557                                         goto out;
1558                                 }
1559                         } else {
1560                                 dev = grt->dst.dev;
1561                                 idev = grt->rt6i_idev;
1562                                 dev_hold(dev);
1563                                 in6_dev_hold(grt->rt6i_idev);
1564                         }
1565                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1566                                 err = 0;
1567                         ip6_rt_put(grt);
1568
1569                         if (err)
1570                                 goto out;
1571                 }
1572                 err = -EINVAL;
1573                 if (!dev || (dev->flags & IFF_LOOPBACK))
1574                         goto out;
1575         }
1576
1577         err = -ENODEV;
1578         if (!dev)
1579                 goto out;
1580
1581         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1582                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1583                         err = -EINVAL;
1584                         goto out;
1585                 }
1586                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1587                 rt->rt6i_prefsrc.plen = 128;
1588         } else
1589                 rt->rt6i_prefsrc.plen = 0;
1590
1591         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1592                 err = rt6_bind_neighbour(rt, dev);
1593                 if (err)
1594                         goto out;
1595         }
1596
1597         rt->rt6i_flags = cfg->fc_flags;
1598
1599 install_route:
1600         if (cfg->fc_mx) {
1601                 struct nlattr *nla;
1602                 int remaining;
1603
1604                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1605                         int type = nla_type(nla);
1606
1607                         if (type) {
1608                                 if (type > RTAX_MAX) {
1609                                         err = -EINVAL;
1610                                         goto out;
1611                                 }
1612
1613                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1614                         }
1615                 }
1616         }
1617
1618         rt->dst.dev = dev;
1619         rt->rt6i_idev = idev;
1620         rt->rt6i_table = table;
1621
1622         cfg->fc_nlinfo.nl_net = dev_net(dev);
1623
1624         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1625
1626 out:
1627         if (dev)
1628                 dev_put(dev);
1629         if (idev)
1630                 in6_dev_put(idev);
1631         if (rt)
1632                 dst_free(&rt->dst);
1633         return err;
1634 }
1635
1636 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1637 {
1638         int err;
1639         struct fib6_table *table;
1640         struct net *net = dev_net(rt->dst.dev);
1641
1642         if (rt == net->ipv6.ip6_null_entry) {
1643                 err = -ENOENT;
1644                 goto out;
1645         }
1646
1647         table = rt->rt6i_table;
1648         write_lock_bh(&table->tb6_lock);
1649         err = fib6_del(rt, info);
1650         write_unlock_bh(&table->tb6_lock);
1651
1652 out:
1653         ip6_rt_put(rt);
1654         return err;
1655 }
1656
1657 int ip6_del_rt(struct rt6_info *rt)
1658 {
1659         struct nl_info info = {
1660                 .nl_net = dev_net(rt->dst.dev),
1661         };
1662         return __ip6_del_rt(rt, &info);
1663 }
1664
1665 static int ip6_route_del(struct fib6_config *cfg)
1666 {
1667         struct fib6_table *table;
1668         struct fib6_node *fn;
1669         struct rt6_info *rt;
1670         int err = -ESRCH;
1671
1672         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1673         if (!table)
1674                 return err;
1675
1676         read_lock_bh(&table->tb6_lock);
1677
1678         fn = fib6_locate(&table->tb6_root,
1679                          &cfg->fc_dst, cfg->fc_dst_len,
1680                          &cfg->fc_src, cfg->fc_src_len);
1681
1682         if (fn) {
1683                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1684                         if (cfg->fc_ifindex &&
1685                             (!rt->dst.dev ||
1686                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1687                                 continue;
1688                         if (cfg->fc_flags & RTF_GATEWAY &&
1689                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1690                                 continue;
1691                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1692                                 continue;
1693                         dst_hold(&rt->dst);
1694                         read_unlock_bh(&table->tb6_lock);
1695
1696                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1697                 }
1698         }
1699         read_unlock_bh(&table->tb6_lock);
1700
1701         return err;
1702 }
1703
1704 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1705 {
1706         struct net *net = dev_net(skb->dev);
1707         struct netevent_redirect netevent;
1708         struct rt6_info *rt, *nrt = NULL;
1709         struct ndisc_options ndopts;
1710         struct neighbour *old_neigh;
1711         struct inet6_dev *in6_dev;
1712         struct neighbour *neigh;
1713         struct rd_msg *msg;
1714         int optlen, on_link;
1715         u8 *lladdr;
1716
1717         optlen = skb->tail - skb->transport_header;
1718         optlen -= sizeof(*msg);
1719
1720         if (optlen < 0) {
1721                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1722                 return;
1723         }
1724
1725         msg = (struct rd_msg *)icmp6_hdr(skb);
1726
1727         if (ipv6_addr_is_multicast(&msg->dest)) {
1728                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1729                 return;
1730         }
1731
1732         on_link = 0;
1733         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1734                 on_link = 1;
1735         } else if (ipv6_addr_type(&msg->target) !=
1736                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1737                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1738                 return;
1739         }
1740
1741         in6_dev = __in6_dev_get(skb->dev);
1742         if (!in6_dev)
1743                 return;
1744         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1745                 return;
1746
1747         /* RFC2461 8.1:
1748          *      The IP source address of the Redirect MUST be the same as the current
1749          *      first-hop router for the specified ICMP Destination Address.
1750          */
1751
1752         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1753                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1754                 return;
1755         }
1756
1757         lladdr = NULL;
1758         if (ndopts.nd_opts_tgt_lladdr) {
1759                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1760                                              skb->dev);
1761                 if (!lladdr) {
1762                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1763                         return;
1764                 }
1765         }
1766
1767         rt = (struct rt6_info *) dst;
1768         if (rt == net->ipv6.ip6_null_entry) {
1769                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1770                 return;
1771         }
1772
1773         /* Redirect received -> path was valid.
1774          * Look, redirects are sent only in response to data packets,
1775          * so that this nexthop apparently is reachable. --ANK
1776          */
1777         dst_confirm(&rt->dst);
1778
1779         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1780         if (!neigh)
1781                 return;
1782
1783         /* Duplicate redirect: silently ignore. */
1784         old_neigh = rt->n;
1785         if (neigh == old_neigh)
1786                 goto out;
1787
1788         /*
1789          *      We have finally decided to accept it.
1790          */
1791
1792         neigh_update(neigh, lladdr, NUD_STALE,
1793                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1794                      NEIGH_UPDATE_F_OVERRIDE|
1795                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1796                                      NEIGH_UPDATE_F_ISROUTER))
1797                      );
1798
1799         nrt = ip6_rt_copy(rt, &msg->dest);
1800         if (!nrt)
1801                 goto out;
1802
1803         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1804         if (on_link)
1805                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1806
1807         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1808         nrt->n = neigh_clone(neigh);
1809
1810         if (ip6_ins_rt(nrt))
1811                 goto out;
1812
1813         netevent.old = &rt->dst;
1814         netevent.new = &nrt->dst;
1815         netevent.daddr = &msg->dest;
1816         netevent.neigh = neigh;
1817         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1818
1819         if (rt->rt6i_flags & RTF_CACHE) {
1820                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1821                 ip6_del_rt(rt);
1822         }
1823
1824 out:
1825         neigh_release(neigh);
1826 }
1827
1828 /*
1829  *      Misc support functions
1830  */
1831
1832 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1833                                     const struct in6_addr *dest)
1834 {
1835         struct net *net = dev_net(ort->dst.dev);
1836         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1837                                             ort->rt6i_table);
1838
1839         if (rt) {
1840                 rt->dst.input = ort->dst.input;
1841                 rt->dst.output = ort->dst.output;
1842                 rt->dst.flags |= DST_HOST;
1843
1844                 rt->rt6i_dst.addr = *dest;
1845                 rt->rt6i_dst.plen = 128;
1846                 dst_copy_metrics(&rt->dst, &ort->dst);
1847                 rt->dst.error = ort->dst.error;
1848                 rt->rt6i_idev = ort->rt6i_idev;
1849                 if (rt->rt6i_idev)
1850                         in6_dev_hold(rt->rt6i_idev);
1851                 rt->dst.lastuse = jiffies;
1852
1853                 rt->rt6i_gateway = ort->rt6i_gateway;
1854                 rt->rt6i_flags = ort->rt6i_flags;
1855                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1856                     (RTF_DEFAULT | RTF_ADDRCONF))
1857                         rt6_set_from(rt, ort);
1858                 else
1859                         rt6_clean_expires(rt);
1860                 rt->rt6i_metric = 0;
1861
1862 #ifdef CONFIG_IPV6_SUBTREES
1863                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1864 #endif
1865                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1866                 rt->rt6i_table = ort->rt6i_table;
1867         }
1868         return rt;
1869 }
1870
1871 #ifdef CONFIG_IPV6_ROUTE_INFO
1872 static struct rt6_info *rt6_get_route_info(struct net *net,
1873                                            const struct in6_addr *prefix, int prefixlen,
1874                                            const struct in6_addr *gwaddr, int ifindex)
1875 {
1876         struct fib6_node *fn;
1877         struct rt6_info *rt = NULL;
1878         struct fib6_table *table;
1879
1880         table = fib6_get_table(net, RT6_TABLE_INFO);
1881         if (!table)
1882                 return NULL;
1883
1884         read_lock_bh(&table->tb6_lock);
1885         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1886         if (!fn)
1887                 goto out;
1888
1889         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1890                 if (rt->dst.dev->ifindex != ifindex)
1891                         continue;
1892                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1893                         continue;
1894                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1895                         continue;
1896                 dst_hold(&rt->dst);
1897                 break;
1898         }
1899 out:
1900         read_unlock_bh(&table->tb6_lock);
1901         return rt;
1902 }
1903
1904 static struct rt6_info *rt6_add_route_info(struct net *net,
1905                                            const struct in6_addr *prefix, int prefixlen,
1906                                            const struct in6_addr *gwaddr, int ifindex,
1907                                            unsigned int pref)
1908 {
1909         struct fib6_config cfg = {
1910                 .fc_table       = RT6_TABLE_INFO,
1911                 .fc_metric      = IP6_RT_PRIO_USER,
1912                 .fc_ifindex     = ifindex,
1913                 .fc_dst_len     = prefixlen,
1914                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1915                                   RTF_UP | RTF_PREF(pref),
1916                 .fc_nlinfo.portid = 0,
1917                 .fc_nlinfo.nlh = NULL,
1918                 .fc_nlinfo.nl_net = net,
1919         };
1920
1921         cfg.fc_dst = *prefix;
1922         cfg.fc_gateway = *gwaddr;
1923
1924         /* We should treat it as a default route if prefix length is 0. */
1925         if (!prefixlen)
1926                 cfg.fc_flags |= RTF_DEFAULT;
1927
1928         ip6_route_add(&cfg);
1929
1930         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1931 }
1932 #endif
1933
1934 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1935 {
1936         struct rt6_info *rt;
1937         struct fib6_table *table;
1938
1939         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1940         if (!table)
1941                 return NULL;
1942
1943         read_lock_bh(&table->tb6_lock);
1944         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1945                 if (dev == rt->dst.dev &&
1946                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1947                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1948                         break;
1949         }
1950         if (rt)
1951                 dst_hold(&rt->dst);
1952         read_unlock_bh(&table->tb6_lock);
1953         return rt;
1954 }
1955
1956 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1957                                      struct net_device *dev,
1958                                      unsigned int pref)
1959 {
1960         struct fib6_config cfg = {
1961                 .fc_table       = RT6_TABLE_DFLT,
1962                 .fc_metric      = IP6_RT_PRIO_USER,
1963                 .fc_ifindex     = dev->ifindex,
1964                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1965                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1966                 .fc_nlinfo.portid = 0,
1967                 .fc_nlinfo.nlh = NULL,
1968                 .fc_nlinfo.nl_net = dev_net(dev),
1969         };
1970
1971         cfg.fc_gateway = *gwaddr;
1972
1973         ip6_route_add(&cfg);
1974
1975         return rt6_get_dflt_router(gwaddr, dev);
1976 }
1977
1978 void rt6_purge_dflt_routers(struct net *net)
1979 {
1980         struct rt6_info *rt;
1981         struct fib6_table *table;
1982
1983         /* NOTE: Keep consistent with rt6_get_dflt_router */
1984         table = fib6_get_table(net, RT6_TABLE_DFLT);
1985         if (!table)
1986                 return;
1987
1988 restart:
1989         read_lock_bh(&table->tb6_lock);
1990         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1991                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1992                         dst_hold(&rt->dst);
1993                         read_unlock_bh(&table->tb6_lock);
1994                         ip6_del_rt(rt);
1995                         goto restart;
1996                 }
1997         }
1998         read_unlock_bh(&table->tb6_lock);
1999 }
2000
2001 static void rtmsg_to_fib6_config(struct net *net,
2002                                  struct in6_rtmsg *rtmsg,
2003                                  struct fib6_config *cfg)
2004 {
2005         memset(cfg, 0, sizeof(*cfg));
2006
2007         cfg->fc_table = RT6_TABLE_MAIN;
2008         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2009         cfg->fc_metric = rtmsg->rtmsg_metric;
2010         cfg->fc_expires = rtmsg->rtmsg_info;
2011         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2012         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2013         cfg->fc_flags = rtmsg->rtmsg_flags;
2014
2015         cfg->fc_nlinfo.nl_net = net;
2016
2017         cfg->fc_dst = rtmsg->rtmsg_dst;
2018         cfg->fc_src = rtmsg->rtmsg_src;
2019         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2020 }
2021
2022 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2023 {
2024         struct fib6_config cfg;
2025         struct in6_rtmsg rtmsg;
2026         int err;
2027
2028         switch(cmd) {
2029         case SIOCADDRT:         /* Add a route */
2030         case SIOCDELRT:         /* Delete a route */
2031                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2032                         return -EPERM;
2033                 err = copy_from_user(&rtmsg, arg,
2034                                      sizeof(struct in6_rtmsg));
2035                 if (err)
2036                         return -EFAULT;
2037
2038                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2039
2040                 rtnl_lock();
2041                 switch (cmd) {
2042                 case SIOCADDRT:
2043                         err = ip6_route_add(&cfg);
2044                         break;
2045                 case SIOCDELRT:
2046                         err = ip6_route_del(&cfg);
2047                         break;
2048                 default:
2049                         err = -EINVAL;
2050                 }
2051                 rtnl_unlock();
2052
2053                 return err;
2054         }
2055
2056         return -EINVAL;
2057 }
2058
2059 /*
2060  *      Drop the packet on the floor
2061  */
2062
2063 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2064 {
2065         int type;
2066         struct dst_entry *dst = skb_dst(skb);
2067         switch (ipstats_mib_noroutes) {
2068         case IPSTATS_MIB_INNOROUTES:
2069                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2070                 if (type == IPV6_ADDR_ANY) {
2071                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2072                                       IPSTATS_MIB_INADDRERRORS);
2073                         break;
2074                 }
2075                 /* FALLTHROUGH */
2076         case IPSTATS_MIB_OUTNOROUTES:
2077                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2078                               ipstats_mib_noroutes);
2079                 break;
2080         }
2081         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2082         kfree_skb(skb);
2083         return 0;
2084 }
2085
2086 static int ip6_pkt_discard(struct sk_buff *skb)
2087 {
2088         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2089 }
2090
2091 static int ip6_pkt_discard_out(struct sk_buff *skb)
2092 {
2093         skb->dev = skb_dst(skb)->dev;
2094         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2095 }
2096
2097 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2098
2099 static int ip6_pkt_prohibit(struct sk_buff *skb)
2100 {
2101         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2102 }
2103
2104 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2105 {
2106         skb->dev = skb_dst(skb)->dev;
2107         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2108 }
2109
2110 #endif
2111
2112 /*
2113  *      Allocate a dst for local (unicast / anycast) address.
2114  */
2115
2116 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2117                                     const struct in6_addr *addr,
2118                                     bool anycast)
2119 {
2120         struct net *net = dev_net(idev->dev);
2121         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2122         int err;
2123
2124         if (!rt) {
2125                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2126                 return ERR_PTR(-ENOMEM);
2127         }
2128
2129         in6_dev_hold(idev);
2130
2131         rt->dst.flags |= DST_HOST;
2132         rt->dst.input = ip6_input;
2133         rt->dst.output = ip6_output;
2134         rt->rt6i_idev = idev;
2135
2136         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2137         if (anycast)
2138                 rt->rt6i_flags |= RTF_ANYCAST;
2139         else
2140                 rt->rt6i_flags |= RTF_LOCAL;
2141         err = rt6_bind_neighbour(rt, rt->dst.dev);
2142         if (err) {
2143                 dst_free(&rt->dst);
2144                 return ERR_PTR(err);
2145         }
2146
2147         rt->rt6i_dst.addr = *addr;
2148         rt->rt6i_dst.plen = 128;
2149         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2150
2151         atomic_set(&rt->dst.__refcnt, 1);
2152
2153         return rt;
2154 }
2155
2156 int ip6_route_get_saddr(struct net *net,
2157                         struct rt6_info *rt,
2158                         const struct in6_addr *daddr,
2159                         unsigned int prefs,
2160                         struct in6_addr *saddr)
2161 {
2162         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2163         int err = 0;
2164         if (rt->rt6i_prefsrc.plen)
2165                 *saddr = rt->rt6i_prefsrc.addr;
2166         else
2167                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2168                                          daddr, prefs, saddr);
2169         return err;
2170 }
2171
2172 /* remove deleted ip from prefsrc entries */
2173 struct arg_dev_net_ip {
2174         struct net_device *dev;
2175         struct net *net;
2176         struct in6_addr *addr;
2177 };
2178
2179 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2180 {
2181         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2182         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2183         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2184
2185         if (((void *)rt->dst.dev == dev || !dev) &&
2186             rt != net->ipv6.ip6_null_entry &&
2187             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2188                 /* remove prefsrc entry */
2189                 rt->rt6i_prefsrc.plen = 0;
2190         }
2191         return 0;
2192 }
2193
2194 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2195 {
2196         struct net *net = dev_net(ifp->idev->dev);
2197         struct arg_dev_net_ip adni = {
2198                 .dev = ifp->idev->dev,
2199                 .net = net,
2200                 .addr = &ifp->addr,
2201         };
2202         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2203 }
2204
2205 struct arg_dev_net {
2206         struct net_device *dev;
2207         struct net *net;
2208 };
2209
2210 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2211 {
2212         const struct arg_dev_net *adn = arg;
2213         const struct net_device *dev = adn->dev;
2214
2215         if ((rt->dst.dev == dev || !dev) &&
2216             rt != adn->net->ipv6.ip6_null_entry)
2217                 return -1;
2218
2219         return 0;
2220 }
2221
2222 void rt6_ifdown(struct net *net, struct net_device *dev)
2223 {
2224         struct arg_dev_net adn = {
2225                 .dev = dev,
2226                 .net = net,
2227         };
2228
2229         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2230         icmp6_clean_all(fib6_ifdown, &adn);
2231 }
2232
2233 struct rt6_mtu_change_arg {
2234         struct net_device *dev;
2235         unsigned int mtu;
2236 };
2237
2238 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2239 {
2240         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2241         struct inet6_dev *idev;
2242
2243         /* In IPv6 pmtu discovery is not optional,
2244            so that RTAX_MTU lock cannot disable it.
2245            We still use this lock to block changes
2246            caused by addrconf/ndisc.
2247         */
2248
2249         idev = __in6_dev_get(arg->dev);
2250         if (!idev)
2251                 return 0;
2252
2253         /* For administrative MTU increase, there is no way to discover
2254            IPv6 PMTU increase, so PMTU increase should be updated here.
2255            Since RFC 1981 doesn't include administrative MTU increase
2256            update PMTU increase is a MUST. (i.e. jumbo frame)
2257          */
2258         /*
2259            If new MTU is less than route PMTU, this new MTU will be the
2260            lowest MTU in the path, update the route PMTU to reflect PMTU
2261            decreases; if new MTU is greater than route PMTU, and the
2262            old MTU is the lowest MTU in the path, update the route PMTU
2263            to reflect the increase. In this case if the other nodes' MTU
2264            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2265            PMTU discouvery.
2266          */
2267         if (rt->dst.dev == arg->dev &&
2268             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2269             (dst_mtu(&rt->dst) >= arg->mtu ||
2270              (dst_mtu(&rt->dst) < arg->mtu &&
2271               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2272                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2273         }
2274         return 0;
2275 }
2276
2277 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2278 {
2279         struct rt6_mtu_change_arg arg = {
2280                 .dev = dev,
2281                 .mtu = mtu,
2282         };
2283
2284         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2285 }
2286
2287 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2288         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2289         [RTA_OIF]               = { .type = NLA_U32 },
2290         [RTA_IIF]               = { .type = NLA_U32 },
2291         [RTA_PRIORITY]          = { .type = NLA_U32 },
2292         [RTA_METRICS]           = { .type = NLA_NESTED },
2293         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2294 };
2295
2296 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2297                               struct fib6_config *cfg)
2298 {
2299         struct rtmsg *rtm;
2300         struct nlattr *tb[RTA_MAX+1];
2301         int err;
2302
2303         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2304         if (err < 0)
2305                 goto errout;
2306
2307         err = -EINVAL;
2308         rtm = nlmsg_data(nlh);
2309         memset(cfg, 0, sizeof(*cfg));
2310
2311         cfg->fc_table = rtm->rtm_table;
2312         cfg->fc_dst_len = rtm->rtm_dst_len;
2313         cfg->fc_src_len = rtm->rtm_src_len;
2314         cfg->fc_flags = RTF_UP;
2315         cfg->fc_protocol = rtm->rtm_protocol;
2316         cfg->fc_type = rtm->rtm_type;
2317
2318         if (rtm->rtm_type == RTN_UNREACHABLE ||
2319             rtm->rtm_type == RTN_BLACKHOLE ||
2320             rtm->rtm_type == RTN_PROHIBIT ||
2321             rtm->rtm_type == RTN_THROW)
2322                 cfg->fc_flags |= RTF_REJECT;
2323
2324         if (rtm->rtm_type == RTN_LOCAL)
2325                 cfg->fc_flags |= RTF_LOCAL;
2326
2327         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2328         cfg->fc_nlinfo.nlh = nlh;
2329         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2330
2331         if (tb[RTA_GATEWAY]) {
2332                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2333                 cfg->fc_flags |= RTF_GATEWAY;
2334         }
2335
2336         if (tb[RTA_DST]) {
2337                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2338
2339                 if (nla_len(tb[RTA_DST]) < plen)
2340                         goto errout;
2341
2342                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2343         }
2344
2345         if (tb[RTA_SRC]) {
2346                 int plen = (rtm->rtm_src_len + 7) >> 3;
2347
2348                 if (nla_len(tb[RTA_SRC]) < plen)
2349                         goto errout;
2350
2351                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2352         }
2353
2354         if (tb[RTA_PREFSRC])
2355                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2356
2357         if (tb[RTA_OIF])
2358                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2359
2360         if (tb[RTA_PRIORITY])
2361                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2362
2363         if (tb[RTA_METRICS]) {
2364                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2365                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2366         }
2367
2368         if (tb[RTA_TABLE])
2369                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2370
2371         if (tb[RTA_MULTIPATH]) {
2372                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2373                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2374         }
2375
2376         err = 0;
2377 errout:
2378         return err;
2379 }
2380
2381 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2382 {
2383         struct fib6_config r_cfg;
2384         struct rtnexthop *rtnh;
2385         int remaining;
2386         int attrlen;
2387         int err = 0, last_err = 0;
2388
2389 beginning:
2390         rtnh = (struct rtnexthop *)cfg->fc_mp;
2391         remaining = cfg->fc_mp_len;
2392
2393         /* Parse a Multipath Entry */
2394         while (rtnh_ok(rtnh, remaining)) {
2395                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2396                 if (rtnh->rtnh_ifindex)
2397                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2398
2399                 attrlen = rtnh_attrlen(rtnh);
2400                 if (attrlen > 0) {
2401                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2402
2403                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2404                         if (nla) {
2405                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2406                                 r_cfg.fc_flags |= RTF_GATEWAY;
2407                         }
2408                 }
2409                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2410                 if (err) {
2411                         last_err = err;
2412                         /* If we are trying to remove a route, do not stop the
2413                          * loop when ip6_route_del() fails (because next hop is
2414                          * already gone), we should try to remove all next hops.
2415                          */
2416                         if (add) {
2417                                 /* If add fails, we should try to delete all
2418                                  * next hops that have been already added.
2419                                  */
2420                                 add = 0;
2421                                 goto beginning;
2422                         }
2423                 }
2424                 /* Because each route is added like a single route we remove
2425                  * this flag after the first nexthop (if there is a collision,
2426                  * we have already fail to add the first nexthop:
2427                  * fib6_add_rt2node() has reject it).
2428                  */
2429                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2430                 rtnh = rtnh_next(rtnh, &remaining);
2431         }
2432
2433         return last_err;
2434 }
2435
2436 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2437 {
2438         struct fib6_config cfg;
2439         int err;
2440
2441         err = rtm_to_fib6_config(skb, nlh, &cfg);
2442         if (err < 0)
2443                 return err;
2444
2445         if (cfg.fc_mp)
2446                 return ip6_route_multipath(&cfg, 0);
2447         else
2448                 return ip6_route_del(&cfg);
2449 }
2450
2451 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2452 {
2453         struct fib6_config cfg;
2454         int err;
2455
2456         err = rtm_to_fib6_config(skb, nlh, &cfg);
2457         if (err < 0)
2458                 return err;
2459
2460         if (cfg.fc_mp)
2461                 return ip6_route_multipath(&cfg, 1);
2462         else
2463                 return ip6_route_add(&cfg);
2464 }
2465
2466 static inline size_t rt6_nlmsg_size(void)
2467 {
2468         return NLMSG_ALIGN(sizeof(struct rtmsg))
2469                + nla_total_size(16) /* RTA_SRC */
2470                + nla_total_size(16) /* RTA_DST */
2471                + nla_total_size(16) /* RTA_GATEWAY */
2472                + nla_total_size(16) /* RTA_PREFSRC */
2473                + nla_total_size(4) /* RTA_TABLE */
2474                + nla_total_size(4) /* RTA_IIF */
2475                + nla_total_size(4) /* RTA_OIF */
2476                + nla_total_size(4) /* RTA_PRIORITY */
2477                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2478                + nla_total_size(sizeof(struct rta_cacheinfo));
2479 }
2480
2481 static int rt6_fill_node(struct net *net,
2482                          struct sk_buff *skb, struct rt6_info *rt,
2483                          struct in6_addr *dst, struct in6_addr *src,
2484                          int iif, int type, u32 portid, u32 seq,
2485                          int prefix, int nowait, unsigned int flags)
2486 {
2487         struct rtmsg *rtm;
2488         struct nlmsghdr *nlh;
2489         long expires;
2490         u32 table;
2491
2492         if (prefix) {   /* user wants prefix routes only */
2493                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2494                         /* success since this is not a prefix route */
2495                         return 1;
2496                 }
2497         }
2498
2499         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2500         if (!nlh)
2501                 return -EMSGSIZE;
2502
2503         rtm = nlmsg_data(nlh);
2504         rtm->rtm_family = AF_INET6;
2505         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2506         rtm->rtm_src_len = rt->rt6i_src.plen;
2507         rtm->rtm_tos = 0;
2508         if (rt->rt6i_table)
2509                 table = rt->rt6i_table->tb6_id;
2510         else
2511                 table = RT6_TABLE_UNSPEC;
2512         rtm->rtm_table = table;
2513         if (nla_put_u32(skb, RTA_TABLE, table))
2514                 goto nla_put_failure;
2515         if (rt->rt6i_flags & RTF_REJECT) {
2516                 switch (rt->dst.error) {
2517                 case -EINVAL:
2518                         rtm->rtm_type = RTN_BLACKHOLE;
2519                         break;
2520                 case -EACCES:
2521                         rtm->rtm_type = RTN_PROHIBIT;
2522                         break;
2523                 case -EAGAIN:
2524                         rtm->rtm_type = RTN_THROW;
2525                         break;
2526                 default:
2527                         rtm->rtm_type = RTN_UNREACHABLE;
2528                         break;
2529                 }
2530         }
2531         else if (rt->rt6i_flags & RTF_LOCAL)
2532                 rtm->rtm_type = RTN_LOCAL;
2533         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2534                 rtm->rtm_type = RTN_LOCAL;
2535         else
2536                 rtm->rtm_type = RTN_UNICAST;
2537         rtm->rtm_flags = 0;
2538         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2539         rtm->rtm_protocol = rt->rt6i_protocol;
2540         if (rt->rt6i_flags & RTF_DYNAMIC)
2541                 rtm->rtm_protocol = RTPROT_REDIRECT;
2542         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2543                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2544                         rtm->rtm_protocol = RTPROT_RA;
2545                 else
2546                         rtm->rtm_protocol = RTPROT_KERNEL;
2547         }
2548
2549         if (rt->rt6i_flags & RTF_CACHE)
2550                 rtm->rtm_flags |= RTM_F_CLONED;
2551
2552         if (dst) {
2553                 if (nla_put(skb, RTA_DST, 16, dst))
2554                         goto nla_put_failure;
2555                 rtm->rtm_dst_len = 128;
2556         } else if (rtm->rtm_dst_len)
2557                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2558                         goto nla_put_failure;
2559 #ifdef CONFIG_IPV6_SUBTREES
2560         if (src) {
2561                 if (nla_put(skb, RTA_SRC, 16, src))
2562                         goto nla_put_failure;
2563                 rtm->rtm_src_len = 128;
2564         } else if (rtm->rtm_src_len &&
2565                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2566                 goto nla_put_failure;
2567 #endif
2568         if (iif) {
2569 #ifdef CONFIG_IPV6_MROUTE
2570                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2571                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2572                         if (err <= 0) {
2573                                 if (!nowait) {
2574                                         if (err == 0)
2575                                                 return 0;
2576                                         goto nla_put_failure;
2577                                 } else {
2578                                         if (err == -EMSGSIZE)
2579                                                 goto nla_put_failure;
2580                                 }
2581                         }
2582                 } else
2583 #endif
2584                         if (nla_put_u32(skb, RTA_IIF, iif))
2585                                 goto nla_put_failure;
2586         } else if (dst) {
2587                 struct in6_addr saddr_buf;
2588                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2589                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2590                         goto nla_put_failure;
2591         }
2592
2593         if (rt->rt6i_prefsrc.plen) {
2594                 struct in6_addr saddr_buf;
2595                 saddr_buf = rt->rt6i_prefsrc.addr;
2596                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2597                         goto nla_put_failure;
2598         }
2599
2600         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2601                 goto nla_put_failure;
2602
2603         if (rt->rt6i_flags & RTF_GATEWAY) {
2604                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2605                         goto nla_put_failure;
2606         }
2607
2608         if (rt->dst.dev &&
2609             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2610                 goto nla_put_failure;
2611         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2612                 goto nla_put_failure;
2613
2614         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2615
2616         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2617                 goto nla_put_failure;
2618
2619         return nlmsg_end(skb, nlh);
2620
2621 nla_put_failure:
2622         nlmsg_cancel(skb, nlh);
2623         return -EMSGSIZE;
2624 }
2625
2626 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2627 {
2628         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2629         int prefix;
2630
2631         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2632                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2633                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2634         } else
2635                 prefix = 0;
2636
2637         return rt6_fill_node(arg->net,
2638                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2639                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2640                      prefix, 0, NLM_F_MULTI);
2641 }
2642
2643 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2644 {
2645         struct net *net = sock_net(in_skb->sk);
2646         struct nlattr *tb[RTA_MAX+1];
2647         struct rt6_info *rt;
2648         struct sk_buff *skb;
2649         struct rtmsg *rtm;
2650         struct flowi6 fl6;
2651         int err, iif = 0, oif = 0;
2652
2653         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2654         if (err < 0)
2655                 goto errout;
2656
2657         err = -EINVAL;
2658         memset(&fl6, 0, sizeof(fl6));
2659
2660         if (tb[RTA_SRC]) {
2661                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2662                         goto errout;
2663
2664                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2665         }
2666
2667         if (tb[RTA_DST]) {
2668                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2669                         goto errout;
2670
2671                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2672         }
2673
2674         if (tb[RTA_IIF])
2675                 iif = nla_get_u32(tb[RTA_IIF]);
2676
2677         if (tb[RTA_OIF])
2678                 oif = nla_get_u32(tb[RTA_OIF]);
2679
2680         if (iif) {
2681                 struct net_device *dev;
2682                 int flags = 0;
2683
2684                 dev = __dev_get_by_index(net, iif);
2685                 if (!dev) {
2686                         err = -ENODEV;
2687                         goto errout;
2688                 }
2689
2690                 fl6.flowi6_iif = iif;
2691
2692                 if (!ipv6_addr_any(&fl6.saddr))
2693                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2694
2695                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2696                                                                flags);
2697         } else {
2698                 fl6.flowi6_oif = oif;
2699
2700                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2701         }
2702
2703         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2704         if (!skb) {
2705                 ip6_rt_put(rt);
2706                 err = -ENOBUFS;
2707                 goto errout;
2708         }
2709
2710         /* Reserve room for dummy headers, this skb can pass
2711            through good chunk of routing engine.
2712          */
2713         skb_reset_mac_header(skb);
2714         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2715
2716         skb_dst_set(skb, &rt->dst);
2717
2718         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2719                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2720                             nlh->nlmsg_seq, 0, 0, 0);
2721         if (err < 0) {
2722                 kfree_skb(skb);
2723                 goto errout;
2724         }
2725
2726         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2727 errout:
2728         return err;
2729 }
2730
2731 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2732 {
2733         struct sk_buff *skb;
2734         struct net *net = info->nl_net;
2735         u32 seq;
2736         int err;
2737
2738         err = -ENOBUFS;
2739         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2740
2741         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2742         if (!skb)
2743                 goto errout;
2744
2745         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2746                                 event, info->portid, seq, 0, 0, 0);
2747         if (err < 0) {
2748                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2749                 WARN_ON(err == -EMSGSIZE);
2750                 kfree_skb(skb);
2751                 goto errout;
2752         }
2753         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2754                     info->nlh, gfp_any());
2755         return;
2756 errout:
2757         if (err < 0)
2758                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2759 }
2760
2761 static int ip6_route_dev_notify(struct notifier_block *this,
2762                                 unsigned long event, void *data)
2763 {
2764         struct net_device *dev = (struct net_device *)data;
2765         struct net *net = dev_net(dev);
2766
2767         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2768                 net->ipv6.ip6_null_entry->dst.dev = dev;
2769                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2770 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2771                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2772                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2773                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2774                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2775 #endif
2776         }
2777
2778         return NOTIFY_OK;
2779 }
2780
2781 /*
2782  *      /proc
2783  */
2784
2785 #ifdef CONFIG_PROC_FS
2786
2787 struct rt6_proc_arg
2788 {
2789         char *buffer;
2790         int offset;
2791         int length;
2792         int skip;
2793         int len;
2794 };
2795
2796 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2797 {
2798         struct seq_file *m = p_arg;
2799
2800         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2801
2802 #ifdef CONFIG_IPV6_SUBTREES
2803         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2804 #else
2805         seq_puts(m, "00000000000000000000000000000000 00 ");
2806 #endif
2807         if (rt->rt6i_flags & RTF_GATEWAY) {
2808                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2809         } else {
2810                 seq_puts(m, "00000000000000000000000000000000");
2811         }
2812         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2813                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2814                    rt->dst.__use, rt->rt6i_flags,
2815                    rt->dst.dev ? rt->dst.dev->name : "");
2816         return 0;
2817 }
2818
2819 static int ipv6_route_show(struct seq_file *m, void *v)
2820 {
2821         struct net *net = (struct net *)m->private;
2822         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2823         return 0;
2824 }
2825
2826 static int ipv6_route_open(struct inode *inode, struct file *file)
2827 {
2828         return single_open_net(inode, file, ipv6_route_show);
2829 }
2830
2831 static const struct file_operations ipv6_route_proc_fops = {
2832         .owner          = THIS_MODULE,
2833         .open           = ipv6_route_open,
2834         .read           = seq_read,
2835         .llseek         = seq_lseek,
2836         .release        = single_release_net,
2837 };
2838
2839 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2840 {
2841         struct net *net = (struct net *)seq->private;
2842         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2843                    net->ipv6.rt6_stats->fib_nodes,
2844                    net->ipv6.rt6_stats->fib_route_nodes,
2845                    net->ipv6.rt6_stats->fib_rt_alloc,
2846                    net->ipv6.rt6_stats->fib_rt_entries,
2847                    net->ipv6.rt6_stats->fib_rt_cache,
2848                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2849                    net->ipv6.rt6_stats->fib_discarded_routes);
2850
2851         return 0;
2852 }
2853
2854 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2855 {
2856         return single_open_net(inode, file, rt6_stats_seq_show);
2857 }
2858
2859 static const struct file_operations rt6_stats_seq_fops = {
2860         .owner   = THIS_MODULE,
2861         .open    = rt6_stats_seq_open,
2862         .read    = seq_read,
2863         .llseek  = seq_lseek,
2864         .release = single_release_net,
2865 };
2866 #endif  /* CONFIG_PROC_FS */
2867
2868 #ifdef CONFIG_SYSCTL
2869
2870 static
2871 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2872                               void __user *buffer, size_t *lenp, loff_t *ppos)
2873 {
2874         struct net *net;
2875         int delay;
2876         if (!write)
2877                 return -EINVAL;
2878
2879         net = (struct net *)ctl->extra1;
2880         delay = net->ipv6.sysctl.flush_delay;
2881         proc_dointvec(ctl, write, buffer, lenp, ppos);
2882         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2883         return 0;
2884 }
2885
2886 ctl_table ipv6_route_table_template[] = {
2887         {
2888                 .procname       =       "flush",
2889                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2890                 .maxlen         =       sizeof(int),
2891                 .mode           =       0200,
2892                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2893         },
2894         {
2895                 .procname       =       "gc_thresh",
2896                 .data           =       &ip6_dst_ops_template.gc_thresh,
2897                 .maxlen         =       sizeof(int),
2898                 .mode           =       0644,
2899                 .proc_handler   =       proc_dointvec,
2900         },
2901         {
2902                 .procname       =       "max_size",
2903                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2904                 .maxlen         =       sizeof(int),
2905                 .mode           =       0644,
2906                 .proc_handler   =       proc_dointvec,
2907         },
2908         {
2909                 .procname       =       "gc_min_interval",
2910                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2911                 .maxlen         =       sizeof(int),
2912                 .mode           =       0644,
2913                 .proc_handler   =       proc_dointvec_jiffies,
2914         },
2915         {
2916                 .procname       =       "gc_timeout",
2917                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2918                 .maxlen         =       sizeof(int),
2919                 .mode           =       0644,
2920                 .proc_handler   =       proc_dointvec_jiffies,
2921         },
2922         {
2923                 .procname       =       "gc_interval",
2924                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2925                 .maxlen         =       sizeof(int),
2926                 .mode           =       0644,
2927                 .proc_handler   =       proc_dointvec_jiffies,
2928         },
2929         {
2930                 .procname       =       "gc_elasticity",
2931                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2932                 .maxlen         =       sizeof(int),
2933                 .mode           =       0644,
2934                 .proc_handler   =       proc_dointvec,
2935         },
2936         {
2937                 .procname       =       "mtu_expires",
2938                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2939                 .maxlen         =       sizeof(int),
2940                 .mode           =       0644,
2941                 .proc_handler   =       proc_dointvec_jiffies,
2942         },
2943         {
2944                 .procname       =       "min_adv_mss",
2945                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2946                 .maxlen         =       sizeof(int),
2947                 .mode           =       0644,
2948                 .proc_handler   =       proc_dointvec,
2949         },
2950         {
2951                 .procname       =       "gc_min_interval_ms",
2952                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2953                 .maxlen         =       sizeof(int),
2954                 .mode           =       0644,
2955                 .proc_handler   =       proc_dointvec_ms_jiffies,
2956         },
2957         { }
2958 };
2959
2960 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2961 {
2962         struct ctl_table *table;
2963
2964         table = kmemdup(ipv6_route_table_template,
2965                         sizeof(ipv6_route_table_template),
2966                         GFP_KERNEL);
2967
2968         if (table) {
2969                 table[0].data = &net->ipv6.sysctl.flush_delay;
2970                 table[0].extra1 = net;
2971                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2972                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2973                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2974                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2975                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2976                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2977                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2978                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2979                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2980
2981                 /* Don't export sysctls to unprivileged users */
2982                 if (net->user_ns != &init_user_ns)
2983                         table[0].procname = NULL;
2984         }
2985
2986         return table;
2987 }
2988 #endif
2989
2990 static int __net_init ip6_route_net_init(struct net *net)
2991 {
2992         int ret = -ENOMEM;
2993
2994         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2995                sizeof(net->ipv6.ip6_dst_ops));
2996
2997         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2998                 goto out_ip6_dst_ops;
2999
3000         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3001                                            sizeof(*net->ipv6.ip6_null_entry),
3002                                            GFP_KERNEL);
3003         if (!net->ipv6.ip6_null_entry)
3004                 goto out_ip6_dst_entries;
3005         net->ipv6.ip6_null_entry->dst.path =
3006                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3007         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3008         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3009                          ip6_template_metrics, true);
3010
3011 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3012         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3013                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3014                                                GFP_KERNEL);
3015         if (!net->ipv6.ip6_prohibit_entry)
3016                 goto out_ip6_null_entry;
3017         net->ipv6.ip6_prohibit_entry->dst.path =
3018                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3019         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3020         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3021                          ip6_template_metrics, true);
3022
3023         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3024                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3025                                                GFP_KERNEL);
3026         if (!net->ipv6.ip6_blk_hole_entry)
3027                 goto out_ip6_prohibit_entry;
3028         net->ipv6.ip6_blk_hole_entry->dst.path =
3029                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3030         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3031         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3032                          ip6_template_metrics, true);
3033 #endif
3034
3035         net->ipv6.sysctl.flush_delay = 0;
3036         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3037         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3038         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3039         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3040         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3041         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3042         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3043
3044         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3045
3046         ret = 0;
3047 out:
3048         return ret;
3049
3050 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3051 out_ip6_prohibit_entry:
3052         kfree(net->ipv6.ip6_prohibit_entry);
3053 out_ip6_null_entry:
3054         kfree(net->ipv6.ip6_null_entry);
3055 #endif
3056 out_ip6_dst_entries:
3057         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3058 out_ip6_dst_ops:
3059         goto out;
3060 }
3061
3062 static void __net_exit ip6_route_net_exit(struct net *net)
3063 {
3064         kfree(net->ipv6.ip6_null_entry);
3065 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3066         kfree(net->ipv6.ip6_prohibit_entry);
3067         kfree(net->ipv6.ip6_blk_hole_entry);
3068 #endif
3069         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3070 }
3071
3072 static int __net_init ip6_route_net_init_late(struct net *net)
3073 {
3074 #ifdef CONFIG_PROC_FS
3075         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3076         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3077 #endif
3078         return 0;
3079 }
3080
3081 static void __net_exit ip6_route_net_exit_late(struct net *net)
3082 {
3083 #ifdef CONFIG_PROC_FS
3084         proc_net_remove(net, "ipv6_route");
3085         proc_net_remove(net, "rt6_stats");
3086 #endif
3087 }
3088
3089 static struct pernet_operations ip6_route_net_ops = {
3090         .init = ip6_route_net_init,
3091         .exit = ip6_route_net_exit,
3092 };
3093
3094 static int __net_init ipv6_inetpeer_init(struct net *net)
3095 {
3096         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3097
3098         if (!bp)
3099                 return -ENOMEM;
3100         inet_peer_base_init(bp);
3101         net->ipv6.peers = bp;
3102         return 0;
3103 }
3104
3105 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3106 {
3107         struct inet_peer_base *bp = net->ipv6.peers;
3108
3109         net->ipv6.peers = NULL;
3110         inetpeer_invalidate_tree(bp);
3111         kfree(bp);
3112 }
3113
3114 static struct pernet_operations ipv6_inetpeer_ops = {
3115         .init   =       ipv6_inetpeer_init,
3116         .exit   =       ipv6_inetpeer_exit,
3117 };
3118
3119 static struct pernet_operations ip6_route_net_late_ops = {
3120         .init = ip6_route_net_init_late,
3121         .exit = ip6_route_net_exit_late,
3122 };
3123
3124 static struct notifier_block ip6_route_dev_notifier = {
3125         .notifier_call = ip6_route_dev_notify,
3126         .priority = 0,
3127 };
3128
3129 int __init ip6_route_init(void)
3130 {
3131         int ret;
3132
3133         ret = -ENOMEM;
3134         ip6_dst_ops_template.kmem_cachep =
3135                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3136                                   SLAB_HWCACHE_ALIGN, NULL);
3137         if (!ip6_dst_ops_template.kmem_cachep)
3138                 goto out;
3139
3140         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3141         if (ret)
3142                 goto out_kmem_cache;
3143
3144         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3145         if (ret)
3146                 goto out_dst_entries;
3147
3148         ret = register_pernet_subsys(&ip6_route_net_ops);
3149         if (ret)
3150                 goto out_register_inetpeer;
3151
3152         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3153
3154         /* Registering of the loopback is done before this portion of code,
3155          * the loopback reference in rt6_info will not be taken, do it
3156          * manually for init_net */
3157         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3158         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3159   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3160         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3161         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3162         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3163         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3164   #endif
3165         ret = fib6_init();
3166         if (ret)
3167                 goto out_register_subsys;
3168
3169         ret = xfrm6_init();
3170         if (ret)
3171                 goto out_fib6_init;
3172
3173         ret = fib6_rules_init();
3174         if (ret)
3175                 goto xfrm6_init;
3176
3177         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3178         if (ret)
3179                 goto fib6_rules_init;
3180
3181         ret = -ENOBUFS;
3182         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3183             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3184             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3185                 goto out_register_late_subsys;
3186
3187         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3188         if (ret)
3189                 goto out_register_late_subsys;
3190
3191 out:
3192         return ret;
3193
3194 out_register_late_subsys:
3195         unregister_pernet_subsys(&ip6_route_net_late_ops);
3196 fib6_rules_init:
3197         fib6_rules_cleanup();
3198 xfrm6_init:
3199         xfrm6_fini();
3200 out_fib6_init:
3201         fib6_gc_cleanup();
3202 out_register_subsys:
3203         unregister_pernet_subsys(&ip6_route_net_ops);
3204 out_register_inetpeer:
3205         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3206 out_dst_entries:
3207         dst_entries_destroy(&ip6_dst_blackhole_ops);
3208 out_kmem_cache:
3209         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3210         goto out;
3211 }
3212
3213 void ip6_route_cleanup(void)
3214 {
3215         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3216         unregister_pernet_subsys(&ip6_route_net_late_ops);
3217         fib6_rules_cleanup();
3218         xfrm6_fini();
3219         fib6_gc_cleanup();
3220         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3221         unregister_pernet_subsys(&ip6_route_net_ops);
3222         dst_entries_destroy(&ip6_dst_blackhole_ops);
3223         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3224 }