ad0aa6b0b86ae02f80b6b2184588605a3d5d7a6c
[linux-3.10.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static struct dst_ops ip6_dst_ops_template = {
155         .family                 =       AF_INET6,
156         .protocol               =       cpu_to_be16(ETH_P_IPV6),
157         .gc                     =       ip6_dst_gc,
158         .gc_thresh              =       1024,
159         .check                  =       ip6_dst_check,
160         .default_advmss         =       ip6_default_advmss,
161         .mtu                    =       ip6_mtu,
162         .cow_metrics            =       ipv6_cow_metrics,
163         .destroy                =       ip6_dst_destroy,
164         .ifdown                 =       ip6_dst_ifdown,
165         .negative_advice        =       ip6_negative_advice,
166         .link_failure           =       ip6_link_failure,
167         .update_pmtu            =       ip6_rt_update_pmtu,
168         .redirect               =       rt6_do_redirect,
169         .local_out              =       __ip6_local_out,
170         .neigh_lookup           =       ip6_neigh_lookup,
171 };
172
173 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
174 {
175         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
176
177         return mtu ? : dst->dev->mtu;
178 }
179
180 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
181                                          struct sk_buff *skb, u32 mtu)
182 {
183 }
184
185 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
186                                       struct sk_buff *skb)
187 {
188 }
189
190 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
191                                          unsigned long old)
192 {
193         return NULL;
194 }
195
196 static struct dst_ops ip6_dst_blackhole_ops = {
197         .family                 =       AF_INET6,
198         .protocol               =       cpu_to_be16(ETH_P_IPV6),
199         .destroy                =       ip6_dst_destroy,
200         .check                  =       ip6_dst_check,
201         .mtu                    =       ip6_blackhole_mtu,
202         .default_advmss         =       ip6_default_advmss,
203         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
204         .redirect               =       ip6_rt_blackhole_redirect,
205         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
206         .neigh_lookup           =       ip6_neigh_lookup,
207 };
208
209 static const u32 ip6_template_metrics[RTAX_MAX] = {
210         [RTAX_HOPLIMIT - 1] = 0,
211 };
212
213 static const struct rt6_info ip6_null_entry_template = {
214         .dst = {
215                 .__refcnt       = ATOMIC_INIT(1),
216                 .__use          = 1,
217                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
218                 .error          = -ENETUNREACH,
219                 .input          = ip6_pkt_discard,
220                 .output         = ip6_pkt_discard_out,
221         },
222         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
223         .rt6i_protocol  = RTPROT_KERNEL,
224         .rt6i_metric    = ~(u32) 0,
225         .rt6i_ref       = ATOMIC_INIT(1),
226 };
227
228 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
229
230 static int ip6_pkt_prohibit(struct sk_buff *skb);
231 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
232
233 static const struct rt6_info ip6_prohibit_entry_template = {
234         .dst = {
235                 .__refcnt       = ATOMIC_INIT(1),
236                 .__use          = 1,
237                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
238                 .error          = -EACCES,
239                 .input          = ip6_pkt_prohibit,
240                 .output         = ip6_pkt_prohibit_out,
241         },
242         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
243         .rt6i_protocol  = RTPROT_KERNEL,
244         .rt6i_metric    = ~(u32) 0,
245         .rt6i_ref       = ATOMIC_INIT(1),
246 };
247
248 static const struct rt6_info ip6_blk_hole_entry_template = {
249         .dst = {
250                 .__refcnt       = ATOMIC_INIT(1),
251                 .__use          = 1,
252                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
253                 .error          = -EINVAL,
254                 .input          = dst_discard,
255                 .output         = dst_discard,
256         },
257         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
258         .rt6i_protocol  = RTPROT_KERNEL,
259         .rt6i_metric    = ~(u32) 0,
260         .rt6i_ref       = ATOMIC_INIT(1),
261 };
262
263 #endif
264
265 /* allocate dst with ip6_dst_ops */
266 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
267                                              struct net_device *dev,
268                                              int flags,
269                                              struct fib6_table *table)
270 {
271         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
272                                         0, DST_OBSOLETE_FORCE_CHK, flags);
273
274         if (rt) {
275                 struct dst_entry *dst = &rt->dst;
276
277                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
278                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279                 rt->rt6i_genid = rt_genid(net);
280                 INIT_LIST_HEAD(&rt->rt6i_siblings);
281                 rt->rt6i_nsiblings = 0;
282         }
283         return rt;
284 }
285
286 static void ip6_dst_destroy(struct dst_entry *dst)
287 {
288         struct rt6_info *rt = (struct rt6_info *)dst;
289         struct inet6_dev *idev = rt->rt6i_idev;
290         struct dst_entry *from = dst->from;
291
292         if (!(rt->dst.flags & DST_HOST))
293                 dst_destroy_metrics_generic(dst);
294
295         if (idev) {
296                 rt->rt6i_idev = NULL;
297                 in6_dev_put(idev);
298         }
299
300         dst->from = NULL;
301         dst_release(from);
302
303         if (rt6_has_peer(rt)) {
304                 struct inet_peer *peer = rt6_peer_ptr(rt);
305                 inet_putpeer(peer);
306         }
307 }
308
309 void rt6_bind_peer(struct rt6_info *rt, int create)
310 {
311         struct inet_peer_base *base;
312         struct inet_peer *peer;
313
314         base = inetpeer_base_ptr(rt->_rt6i_peer);
315         if (!base)
316                 return;
317
318         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
319         if (peer) {
320                 if (!rt6_set_peer(rt, peer))
321                         inet_putpeer(peer);
322         }
323 }
324
325 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
326                            int how)
327 {
328         struct rt6_info *rt = (struct rt6_info *)dst;
329         struct inet6_dev *idev = rt->rt6i_idev;
330         struct net_device *loopback_dev =
331                 dev_net(dev)->loopback_dev;
332
333         if (dev != loopback_dev) {
334                 if (idev && idev->dev == dev) {
335                         struct inet6_dev *loopback_idev =
336                                 in6_dev_get(loopback_dev);
337                         if (loopback_idev) {
338                                 rt->rt6i_idev = loopback_idev;
339                                 in6_dev_put(idev);
340                         }
341                 }
342         }
343 }
344
345 static bool rt6_check_expired(const struct rt6_info *rt)
346 {
347         if (rt->rt6i_flags & RTF_EXPIRES) {
348                 if (time_after(jiffies, rt->dst.expires))
349                         return true;
350         } else if (rt->dst.from) {
351                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
352         }
353         return false;
354 }
355
356 static bool rt6_need_strict(const struct in6_addr *daddr)
357 {
358         return ipv6_addr_type(daddr) &
359                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
360 }
361
362 /* Multipath route selection:
363  *   Hash based function using packet header and flowlabel.
364  * Adapted from fib_info_hashfn()
365  */
366 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
367                                const struct flowi6 *fl6)
368 {
369         unsigned int val = fl6->flowi6_proto;
370
371         val ^= ipv6_addr_hash(&fl6->daddr);
372         val ^= ipv6_addr_hash(&fl6->saddr);
373
374         /* Work only if this not encapsulated */
375         switch (fl6->flowi6_proto) {
376         case IPPROTO_UDP:
377         case IPPROTO_TCP:
378         case IPPROTO_SCTP:
379                 val ^= (__force u16)fl6->fl6_sport;
380                 val ^= (__force u16)fl6->fl6_dport;
381                 break;
382
383         case IPPROTO_ICMPV6:
384                 val ^= (__force u16)fl6->fl6_icmp_type;
385                 val ^= (__force u16)fl6->fl6_icmp_code;
386                 break;
387         }
388         /* RFC6438 recommands to use flowlabel */
389         val ^= (__force u32)fl6->flowlabel;
390
391         /* Perhaps, we need to tune, this function? */
392         val = val ^ (val >> 7) ^ (val >> 12);
393         return val % candidate_count;
394 }
395
396 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
397                                              struct flowi6 *fl6)
398 {
399         struct rt6_info *sibling, *next_sibling;
400         int route_choosen;
401
402         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
403         /* Don't change the route, if route_choosen == 0
404          * (siblings does not include ourself)
405          */
406         if (route_choosen)
407                 list_for_each_entry_safe(sibling, next_sibling,
408                                 &match->rt6i_siblings, rt6i_siblings) {
409                         route_choosen--;
410                         if (route_choosen == 0) {
411                                 match = sibling;
412                                 break;
413                         }
414                 }
415         return match;
416 }
417
418 /*
419  *      Route lookup. Any table->tb6_lock is implied.
420  */
421
422 static inline struct rt6_info *rt6_device_match(struct net *net,
423                                                     struct rt6_info *rt,
424                                                     const struct in6_addr *saddr,
425                                                     int oif,
426                                                     int flags)
427 {
428         struct rt6_info *local = NULL;
429         struct rt6_info *sprt;
430
431         if (!oif && ipv6_addr_any(saddr))
432                 goto out;
433
434         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
435                 struct net_device *dev = sprt->dst.dev;
436
437                 if (oif) {
438                         if (dev->ifindex == oif)
439                                 return sprt;
440                         if (dev->flags & IFF_LOOPBACK) {
441                                 if (!sprt->rt6i_idev ||
442                                     sprt->rt6i_idev->dev->ifindex != oif) {
443                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
444                                                 continue;
445                                         if (local && (!oif ||
446                                                       local->rt6i_idev->dev->ifindex == oif))
447                                                 continue;
448                                 }
449                                 local = sprt;
450                         }
451                 } else {
452                         if (ipv6_chk_addr(net, saddr, dev,
453                                           flags & RT6_LOOKUP_F_IFACE))
454                                 return sprt;
455                 }
456         }
457
458         if (oif) {
459                 if (local)
460                         return local;
461
462                 if (flags & RT6_LOOKUP_F_IFACE)
463                         return net->ipv6.ip6_null_entry;
464         }
465 out:
466         return rt;
467 }
468
469 #ifdef CONFIG_IPV6_ROUTER_PREF
470 static void rt6_probe(struct rt6_info *rt)
471 {
472         struct neighbour *neigh;
473         /*
474          * Okay, this does not seem to be appropriate
475          * for now, however, we need to check if it
476          * is really so; aka Router Reachability Probing.
477          *
478          * Router Reachability Probe MUST be rate-limited
479          * to no more than one per minute.
480          */
481         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
482                 return;
483         rcu_read_lock_bh();
484         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
485         if (neigh) {
486                 write_lock(&neigh->lock);
487                 if (neigh->nud_state & NUD_VALID)
488                         goto out;
489         }
490
491         if (!neigh ||
492             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
493                 struct in6_addr mcaddr;
494                 struct in6_addr *target;
495
496                 if (neigh) {
497                         neigh->updated = jiffies;
498                         write_unlock(&neigh->lock);
499                 }
500
501                 target = (struct in6_addr *)&rt->rt6i_gateway;
502                 addrconf_addr_solict_mult(target, &mcaddr);
503                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
504         } else {
505 out:
506                 write_unlock(&neigh->lock);
507         }
508         rcu_read_unlock_bh();
509 }
510 #else
511 static inline void rt6_probe(struct rt6_info *rt)
512 {
513 }
514 #endif
515
516 /*
517  * Default Router Selection (RFC 2461 6.3.6)
518  */
519 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
520 {
521         struct net_device *dev = rt->dst.dev;
522         if (!oif || dev->ifindex == oif)
523                 return 2;
524         if ((dev->flags & IFF_LOOPBACK) &&
525             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
526                 return 1;
527         return 0;
528 }
529
530 static inline bool rt6_check_neigh(struct rt6_info *rt)
531 {
532         struct neighbour *neigh;
533         bool ret = false;
534
535         if (rt->rt6i_flags & RTF_NONEXTHOP ||
536             !(rt->rt6i_flags & RTF_GATEWAY))
537                 return true;
538
539         rcu_read_lock_bh();
540         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
541         if (neigh) {
542                 read_lock(&neigh->lock);
543                 if (neigh->nud_state & NUD_VALID)
544                         ret = true;
545 #ifdef CONFIG_IPV6_ROUTER_PREF
546                 else if (!(neigh->nud_state & NUD_FAILED))
547                         ret = true;
548 #endif
549                 read_unlock(&neigh->lock);
550         }
551         rcu_read_unlock_bh();
552
553         return ret;
554 }
555
556 static int rt6_score_route(struct rt6_info *rt, int oif,
557                            int strict)
558 {
559         int m;
560
561         m = rt6_check_dev(rt, oif);
562         if (!m && (strict & RT6_LOOKUP_F_IFACE))
563                 return -1;
564 #ifdef CONFIG_IPV6_ROUTER_PREF
565         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
566 #endif
567         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
568                 return -1;
569         return m;
570 }
571
572 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
573                                    int *mpri, struct rt6_info *match)
574 {
575         int m;
576
577         if (rt6_check_expired(rt))
578                 goto out;
579
580         m = rt6_score_route(rt, oif, strict);
581         if (m < 0)
582                 goto out;
583
584         if (m > *mpri) {
585                 if (strict & RT6_LOOKUP_F_REACHABLE)
586                         rt6_probe(match);
587                 *mpri = m;
588                 match = rt;
589         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
590                 rt6_probe(rt);
591         }
592
593 out:
594         return match;
595 }
596
597 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
598                                      struct rt6_info *rr_head,
599                                      u32 metric, int oif, int strict)
600 {
601         struct rt6_info *rt, *match;
602         int mpri = -1;
603
604         match = NULL;
605         for (rt = rr_head; rt && rt->rt6i_metric == metric;
606              rt = rt->dst.rt6_next)
607                 match = find_match(rt, oif, strict, &mpri, match);
608         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
609              rt = rt->dst.rt6_next)
610                 match = find_match(rt, oif, strict, &mpri, match);
611
612         return match;
613 }
614
615 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
616 {
617         struct rt6_info *match, *rt0;
618         struct net *net;
619
620         rt0 = fn->rr_ptr;
621         if (!rt0)
622                 fn->rr_ptr = rt0 = fn->leaf;
623
624         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
625
626         if (!match &&
627             (strict & RT6_LOOKUP_F_REACHABLE)) {
628                 struct rt6_info *next = rt0->dst.rt6_next;
629
630                 /* no entries matched; do round-robin */
631                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
632                         next = fn->leaf;
633
634                 if (next != rt0)
635                         fn->rr_ptr = next;
636         }
637
638         net = dev_net(rt0->dst.dev);
639         return match ? match : net->ipv6.ip6_null_entry;
640 }
641
642 #ifdef CONFIG_IPV6_ROUTE_INFO
643 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
644                   const struct in6_addr *gwaddr)
645 {
646         struct net *net = dev_net(dev);
647         struct route_info *rinfo = (struct route_info *) opt;
648         struct in6_addr prefix_buf, *prefix;
649         unsigned int pref;
650         unsigned long lifetime;
651         struct rt6_info *rt;
652
653         if (len < sizeof(struct route_info)) {
654                 return -EINVAL;
655         }
656
657         /* Sanity check for prefix_len and length */
658         if (rinfo->length > 3) {
659                 return -EINVAL;
660         } else if (rinfo->prefix_len > 128) {
661                 return -EINVAL;
662         } else if (rinfo->prefix_len > 64) {
663                 if (rinfo->length < 2) {
664                         return -EINVAL;
665                 }
666         } else if (rinfo->prefix_len > 0) {
667                 if (rinfo->length < 1) {
668                         return -EINVAL;
669                 }
670         }
671
672         pref = rinfo->route_pref;
673         if (pref == ICMPV6_ROUTER_PREF_INVALID)
674                 return -EINVAL;
675
676         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
677
678         if (rinfo->length == 3)
679                 prefix = (struct in6_addr *)rinfo->prefix;
680         else {
681                 /* this function is safe */
682                 ipv6_addr_prefix(&prefix_buf,
683                                  (struct in6_addr *)rinfo->prefix,
684                                  rinfo->prefix_len);
685                 prefix = &prefix_buf;
686         }
687
688         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
689                                 dev->ifindex);
690
691         if (rt && !lifetime) {
692                 ip6_del_rt(rt);
693                 rt = NULL;
694         }
695
696         if (!rt && lifetime)
697                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
698                                         pref);
699         else if (rt)
700                 rt->rt6i_flags = RTF_ROUTEINFO |
701                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
702
703         if (rt) {
704                 if (!addrconf_finite_timeout(lifetime))
705                         rt6_clean_expires(rt);
706                 else
707                         rt6_set_expires(rt, jiffies + HZ * lifetime);
708
709                 ip6_rt_put(rt);
710         }
711         return 0;
712 }
713 #endif
714
715 #define BACKTRACK(__net, saddr)                 \
716 do { \
717         if (rt == __net->ipv6.ip6_null_entry) { \
718                 struct fib6_node *pn; \
719                 while (1) { \
720                         if (fn->fn_flags & RTN_TL_ROOT) \
721                                 goto out; \
722                         pn = fn->parent; \
723                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
724                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
725                         else \
726                                 fn = pn; \
727                         if (fn->fn_flags & RTN_RTINFO) \
728                                 goto restart; \
729                 } \
730         } \
731 } while (0)
732
733 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
734                                              struct fib6_table *table,
735                                              struct flowi6 *fl6, int flags)
736 {
737         struct fib6_node *fn;
738         struct rt6_info *rt;
739
740         read_lock_bh(&table->tb6_lock);
741         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
742 restart:
743         rt = fn->leaf;
744         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
745         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
746                 rt = rt6_multipath_select(rt, fl6);
747         BACKTRACK(net, &fl6->saddr);
748 out:
749         dst_use(&rt->dst, jiffies);
750         read_unlock_bh(&table->tb6_lock);
751         return rt;
752
753 }
754
755 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
756                                     int flags)
757 {
758         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
759 }
760 EXPORT_SYMBOL_GPL(ip6_route_lookup);
761
762 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
763                             const struct in6_addr *saddr, int oif, int strict)
764 {
765         struct flowi6 fl6 = {
766                 .flowi6_oif = oif,
767                 .daddr = *daddr,
768         };
769         struct dst_entry *dst;
770         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
771
772         if (saddr) {
773                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
774                 flags |= RT6_LOOKUP_F_HAS_SADDR;
775         }
776
777         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
778         if (dst->error == 0)
779                 return (struct rt6_info *) dst;
780
781         dst_release(dst);
782
783         return NULL;
784 }
785
786 EXPORT_SYMBOL(rt6_lookup);
787
788 /* ip6_ins_rt is called with FREE table->tb6_lock.
789    It takes new route entry, the addition fails by any reason the
790    route is freed. In any case, if caller does not hold it, it may
791    be destroyed.
792  */
793
794 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
795 {
796         int err;
797         struct fib6_table *table;
798
799         table = rt->rt6i_table;
800         write_lock_bh(&table->tb6_lock);
801         err = fib6_add(&table->tb6_root, rt, info);
802         write_unlock_bh(&table->tb6_lock);
803
804         return err;
805 }
806
807 int ip6_ins_rt(struct rt6_info *rt)
808 {
809         struct nl_info info = {
810                 .nl_net = dev_net(rt->dst.dev),
811         };
812         return __ip6_ins_rt(rt, &info);
813 }
814
815 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
816                                       const struct in6_addr *daddr,
817                                       const struct in6_addr *saddr)
818 {
819         struct rt6_info *rt;
820
821         /*
822          *      Clone the route.
823          */
824
825         rt = ip6_rt_copy(ort, daddr);
826
827         if (rt) {
828                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
829                         if (ort->rt6i_dst.plen != 128 &&
830                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
831                                 rt->rt6i_flags |= RTF_ANYCAST;
832                         rt->rt6i_gateway = *daddr;
833                 }
834
835                 rt->rt6i_flags |= RTF_CACHE;
836
837 #ifdef CONFIG_IPV6_SUBTREES
838                 if (rt->rt6i_src.plen && saddr) {
839                         rt->rt6i_src.addr = *saddr;
840                         rt->rt6i_src.plen = 128;
841                 }
842 #endif
843         }
844
845         return rt;
846 }
847
848 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
849                                         const struct in6_addr *daddr)
850 {
851         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
852
853         if (rt)
854                 rt->rt6i_flags |= RTF_CACHE;
855         return rt;
856 }
857
858 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
859                                       struct flowi6 *fl6, int flags)
860 {
861         struct fib6_node *fn;
862         struct rt6_info *rt, *nrt;
863         int strict = 0;
864         int attempts = 3;
865         int err;
866         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
867
868         strict |= flags & RT6_LOOKUP_F_IFACE;
869
870 relookup:
871         read_lock_bh(&table->tb6_lock);
872
873 restart_2:
874         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
875
876 restart:
877         rt = rt6_select(fn, oif, strict | reachable);
878         if (rt->rt6i_nsiblings && oif == 0)
879                 rt = rt6_multipath_select(rt, fl6);
880         BACKTRACK(net, &fl6->saddr);
881         if (rt == net->ipv6.ip6_null_entry ||
882             rt->rt6i_flags & RTF_CACHE)
883                 goto out;
884
885         dst_hold(&rt->dst);
886         read_unlock_bh(&table->tb6_lock);
887
888         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
889                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
890         else if (!(rt->dst.flags & DST_HOST))
891                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
892         else
893                 goto out2;
894
895         ip6_rt_put(rt);
896         rt = nrt ? : net->ipv6.ip6_null_entry;
897
898         dst_hold(&rt->dst);
899         if (nrt) {
900                 err = ip6_ins_rt(nrt);
901                 if (!err)
902                         goto out2;
903         }
904
905         if (--attempts <= 0)
906                 goto out2;
907
908         /*
909          * Race condition! In the gap, when table->tb6_lock was
910          * released someone could insert this route.  Relookup.
911          */
912         ip6_rt_put(rt);
913         goto relookup;
914
915 out:
916         if (reachable) {
917                 reachable = 0;
918                 goto restart_2;
919         }
920         dst_hold(&rt->dst);
921         read_unlock_bh(&table->tb6_lock);
922 out2:
923         rt->dst.lastuse = jiffies;
924         rt->dst.__use++;
925
926         return rt;
927 }
928
929 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
930                                             struct flowi6 *fl6, int flags)
931 {
932         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
933 }
934
935 static struct dst_entry *ip6_route_input_lookup(struct net *net,
936                                                 struct net_device *dev,
937                                                 struct flowi6 *fl6, int flags)
938 {
939         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
940                 flags |= RT6_LOOKUP_F_IFACE;
941
942         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
943 }
944
945 void ip6_route_input(struct sk_buff *skb)
946 {
947         const struct ipv6hdr *iph = ipv6_hdr(skb);
948         struct net *net = dev_net(skb->dev);
949         int flags = RT6_LOOKUP_F_HAS_SADDR;
950         struct flowi6 fl6 = {
951                 .flowi6_iif = skb->dev->ifindex,
952                 .daddr = iph->daddr,
953                 .saddr = iph->saddr,
954                 .flowlabel = ip6_flowinfo(iph),
955                 .flowi6_mark = skb->mark,
956                 .flowi6_proto = iph->nexthdr,
957         };
958
959         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
960 }
961
962 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
963                                              struct flowi6 *fl6, int flags)
964 {
965         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
966 }
967
968 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
969                                     struct flowi6 *fl6)
970 {
971         int flags = 0;
972
973         fl6->flowi6_iif = LOOPBACK_IFINDEX;
974
975         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
976                 flags |= RT6_LOOKUP_F_IFACE;
977
978         if (!ipv6_addr_any(&fl6->saddr))
979                 flags |= RT6_LOOKUP_F_HAS_SADDR;
980         else if (sk)
981                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
982
983         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
984 }
985
986 EXPORT_SYMBOL(ip6_route_output);
987
988 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
989 {
990         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
991         struct dst_entry *new = NULL;
992
993         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
994         if (rt) {
995                 new = &rt->dst;
996
997                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
998                 rt6_init_peer(rt, net->ipv6.peers);
999
1000                 new->__use = 1;
1001                 new->input = dst_discard;
1002                 new->output = dst_discard;
1003
1004                 if (dst_metrics_read_only(&ort->dst))
1005                         new->_metrics = ort->dst._metrics;
1006                 else
1007                         dst_copy_metrics(new, &ort->dst);
1008                 rt->rt6i_idev = ort->rt6i_idev;
1009                 if (rt->rt6i_idev)
1010                         in6_dev_hold(rt->rt6i_idev);
1011
1012                 rt->rt6i_gateway = ort->rt6i_gateway;
1013                 rt->rt6i_flags = ort->rt6i_flags;
1014                 rt->rt6i_metric = 0;
1015
1016                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1017 #ifdef CONFIG_IPV6_SUBTREES
1018                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1019 #endif
1020
1021                 dst_free(new);
1022         }
1023
1024         dst_release(dst_orig);
1025         return new ? new : ERR_PTR(-ENOMEM);
1026 }
1027
1028 /*
1029  *      Destination cache support functions
1030  */
1031
1032 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1033 {
1034         struct rt6_info *rt;
1035
1036         rt = (struct rt6_info *) dst;
1037
1038         /* All IPV6 dsts are created with ->obsolete set to the value
1039          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1040          * into this function always.
1041          */
1042         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1043                 return NULL;
1044
1045         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1046                 return dst;
1047
1048         return NULL;
1049 }
1050
1051 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1052 {
1053         struct rt6_info *rt = (struct rt6_info *) dst;
1054
1055         if (rt) {
1056                 if (rt->rt6i_flags & RTF_CACHE) {
1057                         if (rt6_check_expired(rt)) {
1058                                 ip6_del_rt(rt);
1059                                 dst = NULL;
1060                         }
1061                 } else {
1062                         dst_release(dst);
1063                         dst = NULL;
1064                 }
1065         }
1066         return dst;
1067 }
1068
1069 static void ip6_link_failure(struct sk_buff *skb)
1070 {
1071         struct rt6_info *rt;
1072
1073         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1074
1075         rt = (struct rt6_info *) skb_dst(skb);
1076         if (rt) {
1077                 if (rt->rt6i_flags & RTF_CACHE)
1078                         rt6_update_expires(rt, 0);
1079                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1080                         rt->rt6i_node->fn_sernum = -1;
1081         }
1082 }
1083
1084 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1085                                struct sk_buff *skb, u32 mtu)
1086 {
1087         struct rt6_info *rt6 = (struct rt6_info*)dst;
1088
1089         dst_confirm(dst);
1090         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1091                 struct net *net = dev_net(dst->dev);
1092
1093                 rt6->rt6i_flags |= RTF_MODIFIED;
1094                 if (mtu < IPV6_MIN_MTU) {
1095                         u32 features = dst_metric(dst, RTAX_FEATURES);
1096                         mtu = IPV6_MIN_MTU;
1097                         features |= RTAX_FEATURE_ALLFRAG;
1098                         dst_metric_set(dst, RTAX_FEATURES, features);
1099                 }
1100                 dst_metric_set(dst, RTAX_MTU, mtu);
1101                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1102         }
1103 }
1104
1105 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1106                      int oif, u32 mark)
1107 {
1108         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1109         struct dst_entry *dst;
1110         struct flowi6 fl6;
1111
1112         memset(&fl6, 0, sizeof(fl6));
1113         fl6.flowi6_oif = oif;
1114         fl6.flowi6_mark = mark;
1115         fl6.flowi6_flags = 0;
1116         fl6.daddr = iph->daddr;
1117         fl6.saddr = iph->saddr;
1118         fl6.flowlabel = ip6_flowinfo(iph);
1119
1120         dst = ip6_route_output(net, NULL, &fl6);
1121         if (!dst->error)
1122                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123         dst_release(dst);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126
1127 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128 {
1129         ip6_update_pmtu(skb, sock_net(sk), mtu,
1130                         sk->sk_bound_dev_if, sk->sk_mark);
1131 }
1132 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133
1134 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1135 {
1136         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1137         struct dst_entry *dst;
1138         struct flowi6 fl6;
1139
1140         memset(&fl6, 0, sizeof(fl6));
1141         fl6.flowi6_oif = oif;
1142         fl6.flowi6_mark = mark;
1143         fl6.flowi6_flags = 0;
1144         fl6.daddr = iph->daddr;
1145         fl6.saddr = iph->saddr;
1146         fl6.flowlabel = ip6_flowinfo(iph);
1147
1148         dst = ip6_route_output(net, NULL, &fl6);
1149         if (!dst->error)
1150                 rt6_do_redirect(dst, NULL, skb);
1151         dst_release(dst);
1152 }
1153 EXPORT_SYMBOL_GPL(ip6_redirect);
1154
1155 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1156 {
1157         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1158 }
1159 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1160
1161 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1162 {
1163         struct net_device *dev = dst->dev;
1164         unsigned int mtu = dst_mtu(dst);
1165         struct net *net = dev_net(dev);
1166
1167         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1168
1169         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1170                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1171
1172         /*
1173          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1174          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1175          * IPV6_MAXPLEN is also valid and means: "any MSS,
1176          * rely only on pmtu discovery"
1177          */
1178         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1179                 mtu = IPV6_MAXPLEN;
1180         return mtu;
1181 }
1182
1183 static unsigned int ip6_mtu(const struct dst_entry *dst)
1184 {
1185         struct inet6_dev *idev;
1186         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1187
1188         if (mtu)
1189                 return mtu;
1190
1191         mtu = IPV6_MIN_MTU;
1192
1193         rcu_read_lock();
1194         idev = __in6_dev_get(dst->dev);
1195         if (idev)
1196                 mtu = idev->cnf.mtu6;
1197         rcu_read_unlock();
1198
1199         return mtu;
1200 }
1201
1202 static struct dst_entry *icmp6_dst_gc_list;
1203 static DEFINE_SPINLOCK(icmp6_dst_lock);
1204
1205 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1206                                   struct flowi6 *fl6)
1207 {
1208         struct dst_entry *dst;
1209         struct rt6_info *rt;
1210         struct inet6_dev *idev = in6_dev_get(dev);
1211         struct net *net = dev_net(dev);
1212
1213         if (unlikely(!idev))
1214                 return ERR_PTR(-ENODEV);
1215
1216         rt = ip6_dst_alloc(net, dev, 0, NULL);
1217         if (unlikely(!rt)) {
1218                 in6_dev_put(idev);
1219                 dst = ERR_PTR(-ENOMEM);
1220                 goto out;
1221         }
1222
1223         rt->dst.flags |= DST_HOST;
1224         rt->dst.output  = ip6_output;
1225         atomic_set(&rt->dst.__refcnt, 1);
1226         rt->rt6i_dst.addr = fl6->daddr;
1227         rt->rt6i_dst.plen = 128;
1228         rt->rt6i_idev     = idev;
1229         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1230
1231         spin_lock_bh(&icmp6_dst_lock);
1232         rt->dst.next = icmp6_dst_gc_list;
1233         icmp6_dst_gc_list = &rt->dst;
1234         spin_unlock_bh(&icmp6_dst_lock);
1235
1236         fib6_force_start_gc(net);
1237
1238         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1239
1240 out:
1241         return dst;
1242 }
1243
1244 int icmp6_dst_gc(void)
1245 {
1246         struct dst_entry *dst, **pprev;
1247         int more = 0;
1248
1249         spin_lock_bh(&icmp6_dst_lock);
1250         pprev = &icmp6_dst_gc_list;
1251
1252         while ((dst = *pprev) != NULL) {
1253                 if (!atomic_read(&dst->__refcnt)) {
1254                         *pprev = dst->next;
1255                         dst_free(dst);
1256                 } else {
1257                         pprev = &dst->next;
1258                         ++more;
1259                 }
1260         }
1261
1262         spin_unlock_bh(&icmp6_dst_lock);
1263
1264         return more;
1265 }
1266
1267 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1268                             void *arg)
1269 {
1270         struct dst_entry *dst, **pprev;
1271
1272         spin_lock_bh(&icmp6_dst_lock);
1273         pprev = &icmp6_dst_gc_list;
1274         while ((dst = *pprev) != NULL) {
1275                 struct rt6_info *rt = (struct rt6_info *) dst;
1276                 if (func(rt, arg)) {
1277                         *pprev = dst->next;
1278                         dst_free(dst);
1279                 } else {
1280                         pprev = &dst->next;
1281                 }
1282         }
1283         spin_unlock_bh(&icmp6_dst_lock);
1284 }
1285
1286 static int ip6_dst_gc(struct dst_ops *ops)
1287 {
1288         unsigned long now = jiffies;
1289         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1290         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1291         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1292         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1293         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1294         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1295         int entries;
1296
1297         entries = dst_entries_get_fast(ops);
1298         if (time_after(rt_last_gc + rt_min_interval, now) &&
1299             entries <= rt_max_size)
1300                 goto out;
1301
1302         net->ipv6.ip6_rt_gc_expire++;
1303         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1304         net->ipv6.ip6_rt_last_gc = now;
1305         entries = dst_entries_get_slow(ops);
1306         if (entries < ops->gc_thresh)
1307                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1308 out:
1309         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1310         return entries > rt_max_size;
1311 }
1312
1313 int ip6_dst_hoplimit(struct dst_entry *dst)
1314 {
1315         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1316         if (hoplimit == 0) {
1317                 struct net_device *dev = dst->dev;
1318                 struct inet6_dev *idev;
1319
1320                 rcu_read_lock();
1321                 idev = __in6_dev_get(dev);
1322                 if (idev)
1323                         hoplimit = idev->cnf.hop_limit;
1324                 else
1325                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1326                 rcu_read_unlock();
1327         }
1328         return hoplimit;
1329 }
1330 EXPORT_SYMBOL(ip6_dst_hoplimit);
1331
1332 /*
1333  *
1334  */
1335
1336 int ip6_route_add(struct fib6_config *cfg)
1337 {
1338         int err;
1339         struct net *net = cfg->fc_nlinfo.nl_net;
1340         struct rt6_info *rt = NULL;
1341         struct net_device *dev = NULL;
1342         struct inet6_dev *idev = NULL;
1343         struct fib6_table *table;
1344         int addr_type;
1345
1346         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1347                 return -EINVAL;
1348 #ifndef CONFIG_IPV6_SUBTREES
1349         if (cfg->fc_src_len)
1350                 return -EINVAL;
1351 #endif
1352         if (cfg->fc_ifindex) {
1353                 err = -ENODEV;
1354                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1355                 if (!dev)
1356                         goto out;
1357                 idev = in6_dev_get(dev);
1358                 if (!idev)
1359                         goto out;
1360         }
1361
1362         if (cfg->fc_metric == 0)
1363                 cfg->fc_metric = IP6_RT_PRIO_USER;
1364
1365         err = -ENOBUFS;
1366         if (cfg->fc_nlinfo.nlh &&
1367             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1368                 table = fib6_get_table(net, cfg->fc_table);
1369                 if (!table) {
1370                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1371                         table = fib6_new_table(net, cfg->fc_table);
1372                 }
1373         } else {
1374                 table = fib6_new_table(net, cfg->fc_table);
1375         }
1376
1377         if (!table)
1378                 goto out;
1379
1380         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1381
1382         if (!rt) {
1383                 err = -ENOMEM;
1384                 goto out;
1385         }
1386
1387         if (cfg->fc_flags & RTF_EXPIRES)
1388                 rt6_set_expires(rt, jiffies +
1389                                 clock_t_to_jiffies(cfg->fc_expires));
1390         else
1391                 rt6_clean_expires(rt);
1392
1393         if (cfg->fc_protocol == RTPROT_UNSPEC)
1394                 cfg->fc_protocol = RTPROT_BOOT;
1395         rt->rt6i_protocol = cfg->fc_protocol;
1396
1397         addr_type = ipv6_addr_type(&cfg->fc_dst);
1398
1399         if (addr_type & IPV6_ADDR_MULTICAST)
1400                 rt->dst.input = ip6_mc_input;
1401         else if (cfg->fc_flags & RTF_LOCAL)
1402                 rt->dst.input = ip6_input;
1403         else
1404                 rt->dst.input = ip6_forward;
1405
1406         rt->dst.output = ip6_output;
1407
1408         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1409         rt->rt6i_dst.plen = cfg->fc_dst_len;
1410         if (rt->rt6i_dst.plen == 128)
1411                rt->dst.flags |= DST_HOST;
1412
1413         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1414                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1415                 if (!metrics) {
1416                         err = -ENOMEM;
1417                         goto out;
1418                 }
1419                 dst_init_metrics(&rt->dst, metrics, 0);
1420         }
1421 #ifdef CONFIG_IPV6_SUBTREES
1422         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1423         rt->rt6i_src.plen = cfg->fc_src_len;
1424 #endif
1425
1426         rt->rt6i_metric = cfg->fc_metric;
1427
1428         /* We cannot add true routes via loopback here,
1429            they would result in kernel looping; promote them to reject routes
1430          */
1431         if ((cfg->fc_flags & RTF_REJECT) ||
1432             (dev && (dev->flags & IFF_LOOPBACK) &&
1433              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1434              !(cfg->fc_flags & RTF_LOCAL))) {
1435                 /* hold loopback dev/idev if we haven't done so. */
1436                 if (dev != net->loopback_dev) {
1437                         if (dev) {
1438                                 dev_put(dev);
1439                                 in6_dev_put(idev);
1440                         }
1441                         dev = net->loopback_dev;
1442                         dev_hold(dev);
1443                         idev = in6_dev_get(dev);
1444                         if (!idev) {
1445                                 err = -ENODEV;
1446                                 goto out;
1447                         }
1448                 }
1449                 rt->dst.output = ip6_pkt_discard_out;
1450                 rt->dst.input = ip6_pkt_discard;
1451                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1452                 switch (cfg->fc_type) {
1453                 case RTN_BLACKHOLE:
1454                         rt->dst.error = -EINVAL;
1455                         break;
1456                 case RTN_PROHIBIT:
1457                         rt->dst.error = -EACCES;
1458                         break;
1459                 case RTN_THROW:
1460                         rt->dst.error = -EAGAIN;
1461                         break;
1462                 default:
1463                         rt->dst.error = -ENETUNREACH;
1464                         break;
1465                 }
1466                 goto install_route;
1467         }
1468
1469         if (cfg->fc_flags & RTF_GATEWAY) {
1470                 const struct in6_addr *gw_addr;
1471                 int gwa_type;
1472
1473                 gw_addr = &cfg->fc_gateway;
1474                 rt->rt6i_gateway = *gw_addr;
1475                 gwa_type = ipv6_addr_type(gw_addr);
1476
1477                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1478                         struct rt6_info *grt;
1479
1480                         /* IPv6 strictly inhibits using not link-local
1481                            addresses as nexthop address.
1482                            Otherwise, router will not able to send redirects.
1483                            It is very good, but in some (rare!) circumstances
1484                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1485                            some exceptions. --ANK
1486                          */
1487                         err = -EINVAL;
1488                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1489                                 goto out;
1490
1491                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1492
1493                         err = -EHOSTUNREACH;
1494                         if (!grt)
1495                                 goto out;
1496                         if (dev) {
1497                                 if (dev != grt->dst.dev) {
1498                                         ip6_rt_put(grt);
1499                                         goto out;
1500                                 }
1501                         } else {
1502                                 dev = grt->dst.dev;
1503                                 idev = grt->rt6i_idev;
1504                                 dev_hold(dev);
1505                                 in6_dev_hold(grt->rt6i_idev);
1506                         }
1507                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1508                                 err = 0;
1509                         ip6_rt_put(grt);
1510
1511                         if (err)
1512                                 goto out;
1513                 }
1514                 err = -EINVAL;
1515                 if (!dev || (dev->flags & IFF_LOOPBACK))
1516                         goto out;
1517         }
1518
1519         err = -ENODEV;
1520         if (!dev)
1521                 goto out;
1522
1523         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1524                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1525                         err = -EINVAL;
1526                         goto out;
1527                 }
1528                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1529                 rt->rt6i_prefsrc.plen = 128;
1530         } else
1531                 rt->rt6i_prefsrc.plen = 0;
1532
1533         rt->rt6i_flags = cfg->fc_flags;
1534
1535 install_route:
1536         if (cfg->fc_mx) {
1537                 struct nlattr *nla;
1538                 int remaining;
1539
1540                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1541                         int type = nla_type(nla);
1542
1543                         if (type) {
1544                                 if (type > RTAX_MAX) {
1545                                         err = -EINVAL;
1546                                         goto out;
1547                                 }
1548
1549                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1550                         }
1551                 }
1552         }
1553
1554         rt->dst.dev = dev;
1555         rt->rt6i_idev = idev;
1556         rt->rt6i_table = table;
1557
1558         cfg->fc_nlinfo.nl_net = dev_net(dev);
1559
1560         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1561
1562 out:
1563         if (dev)
1564                 dev_put(dev);
1565         if (idev)
1566                 in6_dev_put(idev);
1567         if (rt)
1568                 dst_free(&rt->dst);
1569         return err;
1570 }
1571
1572 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1573 {
1574         int err;
1575         struct fib6_table *table;
1576         struct net *net = dev_net(rt->dst.dev);
1577
1578         if (rt == net->ipv6.ip6_null_entry) {
1579                 err = -ENOENT;
1580                 goto out;
1581         }
1582
1583         table = rt->rt6i_table;
1584         write_lock_bh(&table->tb6_lock);
1585         err = fib6_del(rt, info);
1586         write_unlock_bh(&table->tb6_lock);
1587
1588 out:
1589         ip6_rt_put(rt);
1590         return err;
1591 }
1592
1593 int ip6_del_rt(struct rt6_info *rt)
1594 {
1595         struct nl_info info = {
1596                 .nl_net = dev_net(rt->dst.dev),
1597         };
1598         return __ip6_del_rt(rt, &info);
1599 }
1600
1601 static int ip6_route_del(struct fib6_config *cfg)
1602 {
1603         struct fib6_table *table;
1604         struct fib6_node *fn;
1605         struct rt6_info *rt;
1606         int err = -ESRCH;
1607
1608         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1609         if (!table)
1610                 return err;
1611
1612         read_lock_bh(&table->tb6_lock);
1613
1614         fn = fib6_locate(&table->tb6_root,
1615                          &cfg->fc_dst, cfg->fc_dst_len,
1616                          &cfg->fc_src, cfg->fc_src_len);
1617
1618         if (fn) {
1619                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1620                         if (cfg->fc_ifindex &&
1621                             (!rt->dst.dev ||
1622                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1623                                 continue;
1624                         if (cfg->fc_flags & RTF_GATEWAY &&
1625                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1626                                 continue;
1627                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1628                                 continue;
1629                         dst_hold(&rt->dst);
1630                         read_unlock_bh(&table->tb6_lock);
1631
1632                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1633                 }
1634         }
1635         read_unlock_bh(&table->tb6_lock);
1636
1637         return err;
1638 }
1639
1640 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1641 {
1642         struct net *net = dev_net(skb->dev);
1643         struct netevent_redirect netevent;
1644         struct rt6_info *rt, *nrt = NULL;
1645         struct ndisc_options ndopts;
1646         struct inet6_dev *in6_dev;
1647         struct neighbour *neigh;
1648         struct rd_msg *msg;
1649         int optlen, on_link;
1650         u8 *lladdr;
1651
1652         optlen = skb->tail - skb->transport_header;
1653         optlen -= sizeof(*msg);
1654
1655         if (optlen < 0) {
1656                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1657                 return;
1658         }
1659
1660         msg = (struct rd_msg *)icmp6_hdr(skb);
1661
1662         if (ipv6_addr_is_multicast(&msg->dest)) {
1663                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1664                 return;
1665         }
1666
1667         on_link = 0;
1668         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1669                 on_link = 1;
1670         } else if (ipv6_addr_type(&msg->target) !=
1671                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1672                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1673                 return;
1674         }
1675
1676         in6_dev = __in6_dev_get(skb->dev);
1677         if (!in6_dev)
1678                 return;
1679         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1680                 return;
1681
1682         /* RFC2461 8.1:
1683          *      The IP source address of the Redirect MUST be the same as the current
1684          *      first-hop router for the specified ICMP Destination Address.
1685          */
1686
1687         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1688                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1689                 return;
1690         }
1691
1692         lladdr = NULL;
1693         if (ndopts.nd_opts_tgt_lladdr) {
1694                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1695                                              skb->dev);
1696                 if (!lladdr) {
1697                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1698                         return;
1699                 }
1700         }
1701
1702         rt = (struct rt6_info *) dst;
1703         if (rt == net->ipv6.ip6_null_entry) {
1704                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1705                 return;
1706         }
1707
1708         /* Redirect received -> path was valid.
1709          * Look, redirects are sent only in response to data packets,
1710          * so that this nexthop apparently is reachable. --ANK
1711          */
1712         dst_confirm(&rt->dst);
1713
1714         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1715         if (!neigh)
1716                 return;
1717
1718         /*
1719          *      We have finally decided to accept it.
1720          */
1721
1722         neigh_update(neigh, lladdr, NUD_STALE,
1723                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1724                      NEIGH_UPDATE_F_OVERRIDE|
1725                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1726                                      NEIGH_UPDATE_F_ISROUTER))
1727                      );
1728
1729         nrt = ip6_rt_copy(rt, &msg->dest);
1730         if (!nrt)
1731                 goto out;
1732
1733         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1734         if (on_link)
1735                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1736
1737         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1738
1739         if (ip6_ins_rt(nrt))
1740                 goto out;
1741
1742         netevent.old = &rt->dst;
1743         netevent.new = &nrt->dst;
1744         netevent.daddr = &msg->dest;
1745         netevent.neigh = neigh;
1746         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1747
1748         if (rt->rt6i_flags & RTF_CACHE) {
1749                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1750                 ip6_del_rt(rt);
1751         }
1752
1753 out:
1754         neigh_release(neigh);
1755 }
1756
1757 /*
1758  *      Misc support functions
1759  */
1760
1761 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1762                                     const struct in6_addr *dest)
1763 {
1764         struct net *net = dev_net(ort->dst.dev);
1765         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1766                                             ort->rt6i_table);
1767
1768         if (rt) {
1769                 rt->dst.input = ort->dst.input;
1770                 rt->dst.output = ort->dst.output;
1771                 rt->dst.flags |= DST_HOST;
1772
1773                 rt->rt6i_dst.addr = *dest;
1774                 rt->rt6i_dst.plen = 128;
1775                 dst_copy_metrics(&rt->dst, &ort->dst);
1776                 rt->dst.error = ort->dst.error;
1777                 rt->rt6i_idev = ort->rt6i_idev;
1778                 if (rt->rt6i_idev)
1779                         in6_dev_hold(rt->rt6i_idev);
1780                 rt->dst.lastuse = jiffies;
1781
1782                 rt->rt6i_gateway = ort->rt6i_gateway;
1783                 rt->rt6i_flags = ort->rt6i_flags;
1784                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1785                     (RTF_DEFAULT | RTF_ADDRCONF))
1786                         rt6_set_from(rt, ort);
1787                 rt->rt6i_metric = 0;
1788
1789 #ifdef CONFIG_IPV6_SUBTREES
1790                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1791 #endif
1792                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1793                 rt->rt6i_table = ort->rt6i_table;
1794         }
1795         return rt;
1796 }
1797
1798 #ifdef CONFIG_IPV6_ROUTE_INFO
1799 static struct rt6_info *rt6_get_route_info(struct net *net,
1800                                            const struct in6_addr *prefix, int prefixlen,
1801                                            const struct in6_addr *gwaddr, int ifindex)
1802 {
1803         struct fib6_node *fn;
1804         struct rt6_info *rt = NULL;
1805         struct fib6_table *table;
1806
1807         table = fib6_get_table(net, RT6_TABLE_INFO);
1808         if (!table)
1809                 return NULL;
1810
1811         read_lock_bh(&table->tb6_lock);
1812         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1813         if (!fn)
1814                 goto out;
1815
1816         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1817                 if (rt->dst.dev->ifindex != ifindex)
1818                         continue;
1819                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1820                         continue;
1821                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1822                         continue;
1823                 dst_hold(&rt->dst);
1824                 break;
1825         }
1826 out:
1827         read_unlock_bh(&table->tb6_lock);
1828         return rt;
1829 }
1830
1831 static struct rt6_info *rt6_add_route_info(struct net *net,
1832                                            const struct in6_addr *prefix, int prefixlen,
1833                                            const struct in6_addr *gwaddr, int ifindex,
1834                                            unsigned int pref)
1835 {
1836         struct fib6_config cfg = {
1837                 .fc_table       = RT6_TABLE_INFO,
1838                 .fc_metric      = IP6_RT_PRIO_USER,
1839                 .fc_ifindex     = ifindex,
1840                 .fc_dst_len     = prefixlen,
1841                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1842                                   RTF_UP | RTF_PREF(pref),
1843                 .fc_nlinfo.portid = 0,
1844                 .fc_nlinfo.nlh = NULL,
1845                 .fc_nlinfo.nl_net = net,
1846         };
1847
1848         cfg.fc_dst = *prefix;
1849         cfg.fc_gateway = *gwaddr;
1850
1851         /* We should treat it as a default route if prefix length is 0. */
1852         if (!prefixlen)
1853                 cfg.fc_flags |= RTF_DEFAULT;
1854
1855         ip6_route_add(&cfg);
1856
1857         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1858 }
1859 #endif
1860
1861 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1862 {
1863         struct rt6_info *rt;
1864         struct fib6_table *table;
1865
1866         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1867         if (!table)
1868                 return NULL;
1869
1870         read_lock_bh(&table->tb6_lock);
1871         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1872                 if (dev == rt->dst.dev &&
1873                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1874                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1875                         break;
1876         }
1877         if (rt)
1878                 dst_hold(&rt->dst);
1879         read_unlock_bh(&table->tb6_lock);
1880         return rt;
1881 }
1882
1883 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1884                                      struct net_device *dev,
1885                                      unsigned int pref)
1886 {
1887         struct fib6_config cfg = {
1888                 .fc_table       = RT6_TABLE_DFLT,
1889                 .fc_metric      = IP6_RT_PRIO_USER,
1890                 .fc_ifindex     = dev->ifindex,
1891                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1892                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1893                 .fc_nlinfo.portid = 0,
1894                 .fc_nlinfo.nlh = NULL,
1895                 .fc_nlinfo.nl_net = dev_net(dev),
1896         };
1897
1898         cfg.fc_gateway = *gwaddr;
1899
1900         ip6_route_add(&cfg);
1901
1902         return rt6_get_dflt_router(gwaddr, dev);
1903 }
1904
1905 void rt6_purge_dflt_routers(struct net *net)
1906 {
1907         struct rt6_info *rt;
1908         struct fib6_table *table;
1909
1910         /* NOTE: Keep consistent with rt6_get_dflt_router */
1911         table = fib6_get_table(net, RT6_TABLE_DFLT);
1912         if (!table)
1913                 return;
1914
1915 restart:
1916         read_lock_bh(&table->tb6_lock);
1917         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1918                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1919                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1920                         dst_hold(&rt->dst);
1921                         read_unlock_bh(&table->tb6_lock);
1922                         ip6_del_rt(rt);
1923                         goto restart;
1924                 }
1925         }
1926         read_unlock_bh(&table->tb6_lock);
1927 }
1928
1929 static void rtmsg_to_fib6_config(struct net *net,
1930                                  struct in6_rtmsg *rtmsg,
1931                                  struct fib6_config *cfg)
1932 {
1933         memset(cfg, 0, sizeof(*cfg));
1934
1935         cfg->fc_table = RT6_TABLE_MAIN;
1936         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1937         cfg->fc_metric = rtmsg->rtmsg_metric;
1938         cfg->fc_expires = rtmsg->rtmsg_info;
1939         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1940         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1941         cfg->fc_flags = rtmsg->rtmsg_flags;
1942
1943         cfg->fc_nlinfo.nl_net = net;
1944
1945         cfg->fc_dst = rtmsg->rtmsg_dst;
1946         cfg->fc_src = rtmsg->rtmsg_src;
1947         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1948 }
1949
1950 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1951 {
1952         struct fib6_config cfg;
1953         struct in6_rtmsg rtmsg;
1954         int err;
1955
1956         switch(cmd) {
1957         case SIOCADDRT:         /* Add a route */
1958         case SIOCDELRT:         /* Delete a route */
1959                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1960                         return -EPERM;
1961                 err = copy_from_user(&rtmsg, arg,
1962                                      sizeof(struct in6_rtmsg));
1963                 if (err)
1964                         return -EFAULT;
1965
1966                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1967
1968                 rtnl_lock();
1969                 switch (cmd) {
1970                 case SIOCADDRT:
1971                         err = ip6_route_add(&cfg);
1972                         break;
1973                 case SIOCDELRT:
1974                         err = ip6_route_del(&cfg);
1975                         break;
1976                 default:
1977                         err = -EINVAL;
1978                 }
1979                 rtnl_unlock();
1980
1981                 return err;
1982         }
1983
1984         return -EINVAL;
1985 }
1986
1987 /*
1988  *      Drop the packet on the floor
1989  */
1990
1991 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1992 {
1993         int type;
1994         struct dst_entry *dst = skb_dst(skb);
1995         switch (ipstats_mib_noroutes) {
1996         case IPSTATS_MIB_INNOROUTES:
1997                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1998                 if (type == IPV6_ADDR_ANY) {
1999                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2000                                       IPSTATS_MIB_INADDRERRORS);
2001                         break;
2002                 }
2003                 /* FALLTHROUGH */
2004         case IPSTATS_MIB_OUTNOROUTES:
2005                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2006                               ipstats_mib_noroutes);
2007                 break;
2008         }
2009         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2010         kfree_skb(skb);
2011         return 0;
2012 }
2013
2014 static int ip6_pkt_discard(struct sk_buff *skb)
2015 {
2016         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2017 }
2018
2019 static int ip6_pkt_discard_out(struct sk_buff *skb)
2020 {
2021         skb->dev = skb_dst(skb)->dev;
2022         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2023 }
2024
2025 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2026
2027 static int ip6_pkt_prohibit(struct sk_buff *skb)
2028 {
2029         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2030 }
2031
2032 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2033 {
2034         skb->dev = skb_dst(skb)->dev;
2035         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2036 }
2037
2038 #endif
2039
2040 /*
2041  *      Allocate a dst for local (unicast / anycast) address.
2042  */
2043
2044 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2045                                     const struct in6_addr *addr,
2046                                     bool anycast)
2047 {
2048         struct net *net = dev_net(idev->dev);
2049         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2050
2051         if (!rt) {
2052                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2053                 return ERR_PTR(-ENOMEM);
2054         }
2055
2056         in6_dev_hold(idev);
2057
2058         rt->dst.flags |= DST_HOST;
2059         rt->dst.input = ip6_input;
2060         rt->dst.output = ip6_output;
2061         rt->rt6i_idev = idev;
2062
2063         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064         if (anycast)
2065                 rt->rt6i_flags |= RTF_ANYCAST;
2066         else
2067                 rt->rt6i_flags |= RTF_LOCAL;
2068
2069         rt->rt6i_dst.addr = *addr;
2070         rt->rt6i_dst.plen = 128;
2071         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2072
2073         atomic_set(&rt->dst.__refcnt, 1);
2074
2075         return rt;
2076 }
2077
2078 int ip6_route_get_saddr(struct net *net,
2079                         struct rt6_info *rt,
2080                         const struct in6_addr *daddr,
2081                         unsigned int prefs,
2082                         struct in6_addr *saddr)
2083 {
2084         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2085         int err = 0;
2086         if (rt->rt6i_prefsrc.plen)
2087                 *saddr = rt->rt6i_prefsrc.addr;
2088         else
2089                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2090                                          daddr, prefs, saddr);
2091         return err;
2092 }
2093
2094 /* remove deleted ip from prefsrc entries */
2095 struct arg_dev_net_ip {
2096         struct net_device *dev;
2097         struct net *net;
2098         struct in6_addr *addr;
2099 };
2100
2101 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2102 {
2103         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2104         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2105         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2106
2107         if (((void *)rt->dst.dev == dev || !dev) &&
2108             rt != net->ipv6.ip6_null_entry &&
2109             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2110                 /* remove prefsrc entry */
2111                 rt->rt6i_prefsrc.plen = 0;
2112         }
2113         return 0;
2114 }
2115
2116 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2117 {
2118         struct net *net = dev_net(ifp->idev->dev);
2119         struct arg_dev_net_ip adni = {
2120                 .dev = ifp->idev->dev,
2121                 .net = net,
2122                 .addr = &ifp->addr,
2123         };
2124         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2125 }
2126
2127 struct arg_dev_net {
2128         struct net_device *dev;
2129         struct net *net;
2130 };
2131
2132 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2133 {
2134         const struct arg_dev_net *adn = arg;
2135         const struct net_device *dev = adn->dev;
2136
2137         if ((rt->dst.dev == dev || !dev) &&
2138             rt != adn->net->ipv6.ip6_null_entry)
2139                 return -1;
2140
2141         return 0;
2142 }
2143
2144 void rt6_ifdown(struct net *net, struct net_device *dev)
2145 {
2146         struct arg_dev_net adn = {
2147                 .dev = dev,
2148                 .net = net,
2149         };
2150
2151         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2152         icmp6_clean_all(fib6_ifdown, &adn);
2153 }
2154
2155 struct rt6_mtu_change_arg {
2156         struct net_device *dev;
2157         unsigned int mtu;
2158 };
2159
2160 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2161 {
2162         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2163         struct inet6_dev *idev;
2164
2165         /* In IPv6 pmtu discovery is not optional,
2166            so that RTAX_MTU lock cannot disable it.
2167            We still use this lock to block changes
2168            caused by addrconf/ndisc.
2169         */
2170
2171         idev = __in6_dev_get(arg->dev);
2172         if (!idev)
2173                 return 0;
2174
2175         /* For administrative MTU increase, there is no way to discover
2176            IPv6 PMTU increase, so PMTU increase should be updated here.
2177            Since RFC 1981 doesn't include administrative MTU increase
2178            update PMTU increase is a MUST. (i.e. jumbo frame)
2179          */
2180         /*
2181            If new MTU is less than route PMTU, this new MTU will be the
2182            lowest MTU in the path, update the route PMTU to reflect PMTU
2183            decreases; if new MTU is greater than route PMTU, and the
2184            old MTU is the lowest MTU in the path, update the route PMTU
2185            to reflect the increase. In this case if the other nodes' MTU
2186            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2187            PMTU discouvery.
2188          */
2189         if (rt->dst.dev == arg->dev &&
2190             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2191             (dst_mtu(&rt->dst) >= arg->mtu ||
2192              (dst_mtu(&rt->dst) < arg->mtu &&
2193               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2194                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2195         }
2196         return 0;
2197 }
2198
2199 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2200 {
2201         struct rt6_mtu_change_arg arg = {
2202                 .dev = dev,
2203                 .mtu = mtu,
2204         };
2205
2206         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2207 }
2208
2209 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2210         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2211         [RTA_OIF]               = { .type = NLA_U32 },
2212         [RTA_IIF]               = { .type = NLA_U32 },
2213         [RTA_PRIORITY]          = { .type = NLA_U32 },
2214         [RTA_METRICS]           = { .type = NLA_NESTED },
2215         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2216 };
2217
2218 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2219                               struct fib6_config *cfg)
2220 {
2221         struct rtmsg *rtm;
2222         struct nlattr *tb[RTA_MAX+1];
2223         int err;
2224
2225         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2226         if (err < 0)
2227                 goto errout;
2228
2229         err = -EINVAL;
2230         rtm = nlmsg_data(nlh);
2231         memset(cfg, 0, sizeof(*cfg));
2232
2233         cfg->fc_table = rtm->rtm_table;
2234         cfg->fc_dst_len = rtm->rtm_dst_len;
2235         cfg->fc_src_len = rtm->rtm_src_len;
2236         cfg->fc_flags = RTF_UP;
2237         cfg->fc_protocol = rtm->rtm_protocol;
2238         cfg->fc_type = rtm->rtm_type;
2239
2240         if (rtm->rtm_type == RTN_UNREACHABLE ||
2241             rtm->rtm_type == RTN_BLACKHOLE ||
2242             rtm->rtm_type == RTN_PROHIBIT ||
2243             rtm->rtm_type == RTN_THROW)
2244                 cfg->fc_flags |= RTF_REJECT;
2245
2246         if (rtm->rtm_type == RTN_LOCAL)
2247                 cfg->fc_flags |= RTF_LOCAL;
2248
2249         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2250         cfg->fc_nlinfo.nlh = nlh;
2251         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2252
2253         if (tb[RTA_GATEWAY]) {
2254                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2255                 cfg->fc_flags |= RTF_GATEWAY;
2256         }
2257
2258         if (tb[RTA_DST]) {
2259                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2260
2261                 if (nla_len(tb[RTA_DST]) < plen)
2262                         goto errout;
2263
2264                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2265         }
2266
2267         if (tb[RTA_SRC]) {
2268                 int plen = (rtm->rtm_src_len + 7) >> 3;
2269
2270                 if (nla_len(tb[RTA_SRC]) < plen)
2271                         goto errout;
2272
2273                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2274         }
2275
2276         if (tb[RTA_PREFSRC])
2277                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2278
2279         if (tb[RTA_OIF])
2280                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2281
2282         if (tb[RTA_PRIORITY])
2283                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2284
2285         if (tb[RTA_METRICS]) {
2286                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2287                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2288         }
2289
2290         if (tb[RTA_TABLE])
2291                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2292
2293         if (tb[RTA_MULTIPATH]) {
2294                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2295                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2296         }
2297
2298         err = 0;
2299 errout:
2300         return err;
2301 }
2302
2303 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2304 {
2305         struct fib6_config r_cfg;
2306         struct rtnexthop *rtnh;
2307         int remaining;
2308         int attrlen;
2309         int err = 0, last_err = 0;
2310
2311 beginning:
2312         rtnh = (struct rtnexthop *)cfg->fc_mp;
2313         remaining = cfg->fc_mp_len;
2314
2315         /* Parse a Multipath Entry */
2316         while (rtnh_ok(rtnh, remaining)) {
2317                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2318                 if (rtnh->rtnh_ifindex)
2319                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2320
2321                 attrlen = rtnh_attrlen(rtnh);
2322                 if (attrlen > 0) {
2323                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2324
2325                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2326                         if (nla) {
2327                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2328                                 r_cfg.fc_flags |= RTF_GATEWAY;
2329                         }
2330                 }
2331                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2332                 if (err) {
2333                         last_err = err;
2334                         /* If we are trying to remove a route, do not stop the
2335                          * loop when ip6_route_del() fails (because next hop is
2336                          * already gone), we should try to remove all next hops.
2337                          */
2338                         if (add) {
2339                                 /* If add fails, we should try to delete all
2340                                  * next hops that have been already added.
2341                                  */
2342                                 add = 0;
2343                                 goto beginning;
2344                         }
2345                 }
2346                 /* Because each route is added like a single route we remove
2347                  * this flag after the first nexthop (if there is a collision,
2348                  * we have already fail to add the first nexthop:
2349                  * fib6_add_rt2node() has reject it).
2350                  */
2351                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2352                 rtnh = rtnh_next(rtnh, &remaining);
2353         }
2354
2355         return last_err;
2356 }
2357
2358 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2359 {
2360         struct fib6_config cfg;
2361         int err;
2362
2363         err = rtm_to_fib6_config(skb, nlh, &cfg);
2364         if (err < 0)
2365                 return err;
2366
2367         if (cfg.fc_mp)
2368                 return ip6_route_multipath(&cfg, 0);
2369         else
2370                 return ip6_route_del(&cfg);
2371 }
2372
2373 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2374 {
2375         struct fib6_config cfg;
2376         int err;
2377
2378         err = rtm_to_fib6_config(skb, nlh, &cfg);
2379         if (err < 0)
2380                 return err;
2381
2382         if (cfg.fc_mp)
2383                 return ip6_route_multipath(&cfg, 1);
2384         else
2385                 return ip6_route_add(&cfg);
2386 }
2387
2388 static inline size_t rt6_nlmsg_size(void)
2389 {
2390         return NLMSG_ALIGN(sizeof(struct rtmsg))
2391                + nla_total_size(16) /* RTA_SRC */
2392                + nla_total_size(16) /* RTA_DST */
2393                + nla_total_size(16) /* RTA_GATEWAY */
2394                + nla_total_size(16) /* RTA_PREFSRC */
2395                + nla_total_size(4) /* RTA_TABLE */
2396                + nla_total_size(4) /* RTA_IIF */
2397                + nla_total_size(4) /* RTA_OIF */
2398                + nla_total_size(4) /* RTA_PRIORITY */
2399                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2400                + nla_total_size(sizeof(struct rta_cacheinfo));
2401 }
2402
2403 static int rt6_fill_node(struct net *net,
2404                          struct sk_buff *skb, struct rt6_info *rt,
2405                          struct in6_addr *dst, struct in6_addr *src,
2406                          int iif, int type, u32 portid, u32 seq,
2407                          int prefix, int nowait, unsigned int flags)
2408 {
2409         struct rtmsg *rtm;
2410         struct nlmsghdr *nlh;
2411         long expires;
2412         u32 table;
2413
2414         if (prefix) {   /* user wants prefix routes only */
2415                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2416                         /* success since this is not a prefix route */
2417                         return 1;
2418                 }
2419         }
2420
2421         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2422         if (!nlh)
2423                 return -EMSGSIZE;
2424
2425         rtm = nlmsg_data(nlh);
2426         rtm->rtm_family = AF_INET6;
2427         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2428         rtm->rtm_src_len = rt->rt6i_src.plen;
2429         rtm->rtm_tos = 0;
2430         if (rt->rt6i_table)
2431                 table = rt->rt6i_table->tb6_id;
2432         else
2433                 table = RT6_TABLE_UNSPEC;
2434         rtm->rtm_table = table;
2435         if (nla_put_u32(skb, RTA_TABLE, table))
2436                 goto nla_put_failure;
2437         if (rt->rt6i_flags & RTF_REJECT) {
2438                 switch (rt->dst.error) {
2439                 case -EINVAL:
2440                         rtm->rtm_type = RTN_BLACKHOLE;
2441                         break;
2442                 case -EACCES:
2443                         rtm->rtm_type = RTN_PROHIBIT;
2444                         break;
2445                 case -EAGAIN:
2446                         rtm->rtm_type = RTN_THROW;
2447                         break;
2448                 default:
2449                         rtm->rtm_type = RTN_UNREACHABLE;
2450                         break;
2451                 }
2452         }
2453         else if (rt->rt6i_flags & RTF_LOCAL)
2454                 rtm->rtm_type = RTN_LOCAL;
2455         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2456                 rtm->rtm_type = RTN_LOCAL;
2457         else
2458                 rtm->rtm_type = RTN_UNICAST;
2459         rtm->rtm_flags = 0;
2460         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2461         rtm->rtm_protocol = rt->rt6i_protocol;
2462         if (rt->rt6i_flags & RTF_DYNAMIC)
2463                 rtm->rtm_protocol = RTPROT_REDIRECT;
2464         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2465                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2466                         rtm->rtm_protocol = RTPROT_RA;
2467                 else
2468                         rtm->rtm_protocol = RTPROT_KERNEL;
2469         }
2470
2471         if (rt->rt6i_flags & RTF_CACHE)
2472                 rtm->rtm_flags |= RTM_F_CLONED;
2473
2474         if (dst) {
2475                 if (nla_put(skb, RTA_DST, 16, dst))
2476                         goto nla_put_failure;
2477                 rtm->rtm_dst_len = 128;
2478         } else if (rtm->rtm_dst_len)
2479                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2480                         goto nla_put_failure;
2481 #ifdef CONFIG_IPV6_SUBTREES
2482         if (src) {
2483                 if (nla_put(skb, RTA_SRC, 16, src))
2484                         goto nla_put_failure;
2485                 rtm->rtm_src_len = 128;
2486         } else if (rtm->rtm_src_len &&
2487                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2488                 goto nla_put_failure;
2489 #endif
2490         if (iif) {
2491 #ifdef CONFIG_IPV6_MROUTE
2492                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2493                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2494                         if (err <= 0) {
2495                                 if (!nowait) {
2496                                         if (err == 0)
2497                                                 return 0;
2498                                         goto nla_put_failure;
2499                                 } else {
2500                                         if (err == -EMSGSIZE)
2501                                                 goto nla_put_failure;
2502                                 }
2503                         }
2504                 } else
2505 #endif
2506                         if (nla_put_u32(skb, RTA_IIF, iif))
2507                                 goto nla_put_failure;
2508         } else if (dst) {
2509                 struct in6_addr saddr_buf;
2510                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2511                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2512                         goto nla_put_failure;
2513         }
2514
2515         if (rt->rt6i_prefsrc.plen) {
2516                 struct in6_addr saddr_buf;
2517                 saddr_buf = rt->rt6i_prefsrc.addr;
2518                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2519                         goto nla_put_failure;
2520         }
2521
2522         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2523                 goto nla_put_failure;
2524
2525         if (rt->rt6i_flags & RTF_GATEWAY) {
2526                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2527                         goto nla_put_failure;
2528         }
2529
2530         if (rt->dst.dev &&
2531             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2532                 goto nla_put_failure;
2533         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2534                 goto nla_put_failure;
2535
2536         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2537
2538         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2539                 goto nla_put_failure;
2540
2541         return nlmsg_end(skb, nlh);
2542
2543 nla_put_failure:
2544         nlmsg_cancel(skb, nlh);
2545         return -EMSGSIZE;
2546 }
2547
2548 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2549 {
2550         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2551         int prefix;
2552
2553         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2554                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2555                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2556         } else
2557                 prefix = 0;
2558
2559         return rt6_fill_node(arg->net,
2560                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2561                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2562                      prefix, 0, NLM_F_MULTI);
2563 }
2564
2565 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2566 {
2567         struct net *net = sock_net(in_skb->sk);
2568         struct nlattr *tb[RTA_MAX+1];
2569         struct rt6_info *rt;
2570         struct sk_buff *skb;
2571         struct rtmsg *rtm;
2572         struct flowi6 fl6;
2573         int err, iif = 0, oif = 0;
2574
2575         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2576         if (err < 0)
2577                 goto errout;
2578
2579         err = -EINVAL;
2580         memset(&fl6, 0, sizeof(fl6));
2581
2582         if (tb[RTA_SRC]) {
2583                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2584                         goto errout;
2585
2586                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2587         }
2588
2589         if (tb[RTA_DST]) {
2590                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2591                         goto errout;
2592
2593                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2594         }
2595
2596         if (tb[RTA_IIF])
2597                 iif = nla_get_u32(tb[RTA_IIF]);
2598
2599         if (tb[RTA_OIF])
2600                 oif = nla_get_u32(tb[RTA_OIF]);
2601
2602         if (iif) {
2603                 struct net_device *dev;
2604                 int flags = 0;
2605
2606                 dev = __dev_get_by_index(net, iif);
2607                 if (!dev) {
2608                         err = -ENODEV;
2609                         goto errout;
2610                 }
2611
2612                 fl6.flowi6_iif = iif;
2613
2614                 if (!ipv6_addr_any(&fl6.saddr))
2615                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2616
2617                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2618                                                                flags);
2619         } else {
2620                 fl6.flowi6_oif = oif;
2621
2622                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2623         }
2624
2625         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2626         if (!skb) {
2627                 ip6_rt_put(rt);
2628                 err = -ENOBUFS;
2629                 goto errout;
2630         }
2631
2632         /* Reserve room for dummy headers, this skb can pass
2633            through good chunk of routing engine.
2634          */
2635         skb_reset_mac_header(skb);
2636         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2637
2638         skb_dst_set(skb, &rt->dst);
2639
2640         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2641                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2642                             nlh->nlmsg_seq, 0, 0, 0);
2643         if (err < 0) {
2644                 kfree_skb(skb);
2645                 goto errout;
2646         }
2647
2648         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2649 errout:
2650         return err;
2651 }
2652
2653 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2654 {
2655         struct sk_buff *skb;
2656         struct net *net = info->nl_net;
2657         u32 seq;
2658         int err;
2659
2660         err = -ENOBUFS;
2661         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2662
2663         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2664         if (!skb)
2665                 goto errout;
2666
2667         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2668                                 event, info->portid, seq, 0, 0, 0);
2669         if (err < 0) {
2670                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2671                 WARN_ON(err == -EMSGSIZE);
2672                 kfree_skb(skb);
2673                 goto errout;
2674         }
2675         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2676                     info->nlh, gfp_any());
2677         return;
2678 errout:
2679         if (err < 0)
2680                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2681 }
2682
2683 static int ip6_route_dev_notify(struct notifier_block *this,
2684                                 unsigned long event, void *data)
2685 {
2686         struct net_device *dev = (struct net_device *)data;
2687         struct net *net = dev_net(dev);
2688
2689         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2690                 net->ipv6.ip6_null_entry->dst.dev = dev;
2691                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2692 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2694                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2695                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2696                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2697 #endif
2698         }
2699
2700         return NOTIFY_OK;
2701 }
2702
2703 /*
2704  *      /proc
2705  */
2706
2707 #ifdef CONFIG_PROC_FS
2708
2709 struct rt6_proc_arg
2710 {
2711         char *buffer;
2712         int offset;
2713         int length;
2714         int skip;
2715         int len;
2716 };
2717
2718 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2719 {
2720         struct seq_file *m = p_arg;
2721
2722         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2723
2724 #ifdef CONFIG_IPV6_SUBTREES
2725         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2726 #else
2727         seq_puts(m, "00000000000000000000000000000000 00 ");
2728 #endif
2729         if (rt->rt6i_flags & RTF_GATEWAY) {
2730                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2731         } else {
2732                 seq_puts(m, "00000000000000000000000000000000");
2733         }
2734         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2735                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2736                    rt->dst.__use, rt->rt6i_flags,
2737                    rt->dst.dev ? rt->dst.dev->name : "");
2738         return 0;
2739 }
2740
2741 static int ipv6_route_show(struct seq_file *m, void *v)
2742 {
2743         struct net *net = (struct net *)m->private;
2744         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2745         return 0;
2746 }
2747
2748 static int ipv6_route_open(struct inode *inode, struct file *file)
2749 {
2750         return single_open_net(inode, file, ipv6_route_show);
2751 }
2752
2753 static const struct file_operations ipv6_route_proc_fops = {
2754         .owner          = THIS_MODULE,
2755         .open           = ipv6_route_open,
2756         .read           = seq_read,
2757         .llseek         = seq_lseek,
2758         .release        = single_release_net,
2759 };
2760
2761 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2762 {
2763         struct net *net = (struct net *)seq->private;
2764         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2765                    net->ipv6.rt6_stats->fib_nodes,
2766                    net->ipv6.rt6_stats->fib_route_nodes,
2767                    net->ipv6.rt6_stats->fib_rt_alloc,
2768                    net->ipv6.rt6_stats->fib_rt_entries,
2769                    net->ipv6.rt6_stats->fib_rt_cache,
2770                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2771                    net->ipv6.rt6_stats->fib_discarded_routes);
2772
2773         return 0;
2774 }
2775
2776 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2777 {
2778         return single_open_net(inode, file, rt6_stats_seq_show);
2779 }
2780
2781 static const struct file_operations rt6_stats_seq_fops = {
2782         .owner   = THIS_MODULE,
2783         .open    = rt6_stats_seq_open,
2784         .read    = seq_read,
2785         .llseek  = seq_lseek,
2786         .release = single_release_net,
2787 };
2788 #endif  /* CONFIG_PROC_FS */
2789
2790 #ifdef CONFIG_SYSCTL
2791
2792 static
2793 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2794                               void __user *buffer, size_t *lenp, loff_t *ppos)
2795 {
2796         struct net *net;
2797         int delay;
2798         if (!write)
2799                 return -EINVAL;
2800
2801         net = (struct net *)ctl->extra1;
2802         delay = net->ipv6.sysctl.flush_delay;
2803         proc_dointvec(ctl, write, buffer, lenp, ppos);
2804         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2805         return 0;
2806 }
2807
2808 ctl_table ipv6_route_table_template[] = {
2809         {
2810                 .procname       =       "flush",
2811                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2812                 .maxlen         =       sizeof(int),
2813                 .mode           =       0200,
2814                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2815         },
2816         {
2817                 .procname       =       "gc_thresh",
2818                 .data           =       &ip6_dst_ops_template.gc_thresh,
2819                 .maxlen         =       sizeof(int),
2820                 .mode           =       0644,
2821                 .proc_handler   =       proc_dointvec,
2822         },
2823         {
2824                 .procname       =       "max_size",
2825                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2826                 .maxlen         =       sizeof(int),
2827                 .mode           =       0644,
2828                 .proc_handler   =       proc_dointvec,
2829         },
2830         {
2831                 .procname       =       "gc_min_interval",
2832                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2833                 .maxlen         =       sizeof(int),
2834                 .mode           =       0644,
2835                 .proc_handler   =       proc_dointvec_jiffies,
2836         },
2837         {
2838                 .procname       =       "gc_timeout",
2839                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2840                 .maxlen         =       sizeof(int),
2841                 .mode           =       0644,
2842                 .proc_handler   =       proc_dointvec_jiffies,
2843         },
2844         {
2845                 .procname       =       "gc_interval",
2846                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2847                 .maxlen         =       sizeof(int),
2848                 .mode           =       0644,
2849                 .proc_handler   =       proc_dointvec_jiffies,
2850         },
2851         {
2852                 .procname       =       "gc_elasticity",
2853                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2854                 .maxlen         =       sizeof(int),
2855                 .mode           =       0644,
2856                 .proc_handler   =       proc_dointvec,
2857         },
2858         {
2859                 .procname       =       "mtu_expires",
2860                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2861                 .maxlen         =       sizeof(int),
2862                 .mode           =       0644,
2863                 .proc_handler   =       proc_dointvec_jiffies,
2864         },
2865         {
2866                 .procname       =       "min_adv_mss",
2867                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2868                 .maxlen         =       sizeof(int),
2869                 .mode           =       0644,
2870                 .proc_handler   =       proc_dointvec,
2871         },
2872         {
2873                 .procname       =       "gc_min_interval_ms",
2874                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2875                 .maxlen         =       sizeof(int),
2876                 .mode           =       0644,
2877                 .proc_handler   =       proc_dointvec_ms_jiffies,
2878         },
2879         { }
2880 };
2881
2882 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2883 {
2884         struct ctl_table *table;
2885
2886         table = kmemdup(ipv6_route_table_template,
2887                         sizeof(ipv6_route_table_template),
2888                         GFP_KERNEL);
2889
2890         if (table) {
2891                 table[0].data = &net->ipv6.sysctl.flush_delay;
2892                 table[0].extra1 = net;
2893                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2894                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2895                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2896                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2897                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2898                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2899                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2900                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2901                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2902
2903                 /* Don't export sysctls to unprivileged users */
2904                 if (net->user_ns != &init_user_ns)
2905                         table[0].procname = NULL;
2906         }
2907
2908         return table;
2909 }
2910 #endif
2911
2912 static int __net_init ip6_route_net_init(struct net *net)
2913 {
2914         int ret = -ENOMEM;
2915
2916         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2917                sizeof(net->ipv6.ip6_dst_ops));
2918
2919         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2920                 goto out_ip6_dst_ops;
2921
2922         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2923                                            sizeof(*net->ipv6.ip6_null_entry),
2924                                            GFP_KERNEL);
2925         if (!net->ipv6.ip6_null_entry)
2926                 goto out_ip6_dst_entries;
2927         net->ipv6.ip6_null_entry->dst.path =
2928                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2929         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2930         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2931                          ip6_template_metrics, true);
2932
2933 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2934         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2935                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2936                                                GFP_KERNEL);
2937         if (!net->ipv6.ip6_prohibit_entry)
2938                 goto out_ip6_null_entry;
2939         net->ipv6.ip6_prohibit_entry->dst.path =
2940                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2941         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2942         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2943                          ip6_template_metrics, true);
2944
2945         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2946                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2947                                                GFP_KERNEL);
2948         if (!net->ipv6.ip6_blk_hole_entry)
2949                 goto out_ip6_prohibit_entry;
2950         net->ipv6.ip6_blk_hole_entry->dst.path =
2951                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2952         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2953         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2954                          ip6_template_metrics, true);
2955 #endif
2956
2957         net->ipv6.sysctl.flush_delay = 0;
2958         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2959         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2960         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2961         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2962         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2963         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2964         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2965
2966         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2967
2968         ret = 0;
2969 out:
2970         return ret;
2971
2972 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2973 out_ip6_prohibit_entry:
2974         kfree(net->ipv6.ip6_prohibit_entry);
2975 out_ip6_null_entry:
2976         kfree(net->ipv6.ip6_null_entry);
2977 #endif
2978 out_ip6_dst_entries:
2979         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2980 out_ip6_dst_ops:
2981         goto out;
2982 }
2983
2984 static void __net_exit ip6_route_net_exit(struct net *net)
2985 {
2986         kfree(net->ipv6.ip6_null_entry);
2987 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2988         kfree(net->ipv6.ip6_prohibit_entry);
2989         kfree(net->ipv6.ip6_blk_hole_entry);
2990 #endif
2991         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2992 }
2993
2994 static int __net_init ip6_route_net_init_late(struct net *net)
2995 {
2996 #ifdef CONFIG_PROC_FS
2997         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
2998         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
2999 #endif
3000         return 0;
3001 }
3002
3003 static void __net_exit ip6_route_net_exit_late(struct net *net)
3004 {
3005 #ifdef CONFIG_PROC_FS
3006         remove_proc_entry("ipv6_route", net->proc_net);
3007         remove_proc_entry("rt6_stats", net->proc_net);
3008 #endif
3009 }
3010
3011 static struct pernet_operations ip6_route_net_ops = {
3012         .init = ip6_route_net_init,
3013         .exit = ip6_route_net_exit,
3014 };
3015
3016 static int __net_init ipv6_inetpeer_init(struct net *net)
3017 {
3018         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3019
3020         if (!bp)
3021                 return -ENOMEM;
3022         inet_peer_base_init(bp);
3023         net->ipv6.peers = bp;
3024         return 0;
3025 }
3026
3027 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3028 {
3029         struct inet_peer_base *bp = net->ipv6.peers;
3030
3031         net->ipv6.peers = NULL;
3032         inetpeer_invalidate_tree(bp);
3033         kfree(bp);
3034 }
3035
3036 static struct pernet_operations ipv6_inetpeer_ops = {
3037         .init   =       ipv6_inetpeer_init,
3038         .exit   =       ipv6_inetpeer_exit,
3039 };
3040
3041 static struct pernet_operations ip6_route_net_late_ops = {
3042         .init = ip6_route_net_init_late,
3043         .exit = ip6_route_net_exit_late,
3044 };
3045
3046 static struct notifier_block ip6_route_dev_notifier = {
3047         .notifier_call = ip6_route_dev_notify,
3048         .priority = 0,
3049 };
3050
3051 int __init ip6_route_init(void)
3052 {
3053         int ret;
3054
3055         ret = -ENOMEM;
3056         ip6_dst_ops_template.kmem_cachep =
3057                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3058                                   SLAB_HWCACHE_ALIGN, NULL);
3059         if (!ip6_dst_ops_template.kmem_cachep)
3060                 goto out;
3061
3062         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3063         if (ret)
3064                 goto out_kmem_cache;
3065
3066         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3067         if (ret)
3068                 goto out_dst_entries;
3069
3070         ret = register_pernet_subsys(&ip6_route_net_ops);
3071         if (ret)
3072                 goto out_register_inetpeer;
3073
3074         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3075
3076         /* Registering of the loopback is done before this portion of code,
3077          * the loopback reference in rt6_info will not be taken, do it
3078          * manually for init_net */
3079         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3080         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3081   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3082         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3083         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3084         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3085         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3086   #endif
3087         ret = fib6_init();
3088         if (ret)
3089                 goto out_register_subsys;
3090
3091         ret = xfrm6_init();
3092         if (ret)
3093                 goto out_fib6_init;
3094
3095         ret = fib6_rules_init();
3096         if (ret)
3097                 goto xfrm6_init;
3098
3099         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3100         if (ret)
3101                 goto fib6_rules_init;
3102
3103         ret = -ENOBUFS;
3104         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3105             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3106             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3107                 goto out_register_late_subsys;
3108
3109         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3110         if (ret)
3111                 goto out_register_late_subsys;
3112
3113 out:
3114         return ret;
3115
3116 out_register_late_subsys:
3117         unregister_pernet_subsys(&ip6_route_net_late_ops);
3118 fib6_rules_init:
3119         fib6_rules_cleanup();
3120 xfrm6_init:
3121         xfrm6_fini();
3122 out_fib6_init:
3123         fib6_gc_cleanup();
3124 out_register_subsys:
3125         unregister_pernet_subsys(&ip6_route_net_ops);
3126 out_register_inetpeer:
3127         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3128 out_dst_entries:
3129         dst_entries_destroy(&ip6_dst_blackhole_ops);
3130 out_kmem_cache:
3131         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3132         goto out;
3133 }
3134
3135 void ip6_route_cleanup(void)
3136 {
3137         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3138         unregister_pernet_subsys(&ip6_route_net_late_ops);
3139         fib6_rules_cleanup();
3140         xfrm6_fini();
3141         fib6_gc_cleanup();
3142         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3143         unregister_pernet_subsys(&ip6_route_net_ops);
3144         dst_entries_destroy(&ip6_dst_blackhole_ops);
3145         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3146 }