netlink: Rename pid to portid to avoid confusion
[linux-3.10.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68                                     const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int      ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void             ip6_dst_destroy(struct dst_entry *);
74 static void             ip6_dst_ifdown(struct dst_entry *,
75                                        struct net_device *dev, int how);
76 static int               ip6_dst_gc(struct dst_ops *ops);
77
78 static int              ip6_pkt_discard(struct sk_buff *skb);
79 static int              ip6_pkt_discard_out(struct sk_buff *skb);
80 static void             ip6_link_failure(struct sk_buff *skb);
81 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82                                            struct sk_buff *skb, u32 mtu);
83 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84                                         struct sk_buff *skb);
85
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88                                            const struct in6_addr *prefix, int prefixlen,
89                                            const struct in6_addr *gwaddr, int ifindex,
90                                            unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92                                            const struct in6_addr *prefix, int prefixlen,
93                                            const struct in6_addr *gwaddr, int ifindex);
94 #endif
95
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98         struct rt6_info *rt = (struct rt6_info *) dst;
99         struct inet_peer *peer;
100         u32 *p = NULL;
101
102         if (!(rt->dst.flags & DST_HOST))
103                 return NULL;
104
105         peer = rt6_get_peer_create(rt);
106         if (peer) {
107                 u32 *old_p = __DST_METRICS_PTR(old);
108                 unsigned long prev, new;
109
110                 p = peer->metrics;
111                 if (inet_metrics_new(peer))
112                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114                 new = (unsigned long) p;
115                 prev = cmpxchg(&dst->_metrics, old, new);
116
117                 if (prev != old) {
118                         p = __DST_METRICS_PTR(prev);
119                         if (prev & DST_METRICS_READ_ONLY)
120                                 p = NULL;
121                 }
122         }
123         return p;
124 }
125
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127                                              struct sk_buff *skb,
128                                              const void *daddr)
129 {
130         struct in6_addr *p = &rt->rt6i_gateway;
131
132         if (!ipv6_addr_any(p))
133                 return (const void *) p;
134         else if (skb)
135                 return &ipv6_hdr(skb)->daddr;
136         return daddr;
137 }
138
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140                                           struct sk_buff *skb,
141                                           const void *daddr)
142 {
143         struct rt6_info *rt = (struct rt6_info *) dst;
144         struct neighbour *n;
145
146         daddr = choose_neigh_daddr(rt, skb, daddr);
147         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148         if (n)
149                 return n;
150         return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156         if (!n) {
157                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158                 if (IS_ERR(n))
159                         return PTR_ERR(n);
160         }
161         rt->n = n;
162
163         return 0;
164 }
165
166 static struct dst_ops ip6_dst_ops_template = {
167         .family                 =       AF_INET6,
168         .protocol               =       cpu_to_be16(ETH_P_IPV6),
169         .gc                     =       ip6_dst_gc,
170         .gc_thresh              =       1024,
171         .check                  =       ip6_dst_check,
172         .default_advmss         =       ip6_default_advmss,
173         .mtu                    =       ip6_mtu,
174         .cow_metrics            =       ipv6_cow_metrics,
175         .destroy                =       ip6_dst_destroy,
176         .ifdown                 =       ip6_dst_ifdown,
177         .negative_advice        =       ip6_negative_advice,
178         .link_failure           =       ip6_link_failure,
179         .update_pmtu            =       ip6_rt_update_pmtu,
180         .redirect               =       rt6_do_redirect,
181         .local_out              =       __ip6_local_out,
182         .neigh_lookup           =       ip6_neigh_lookup,
183 };
184
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189         return mtu ? : dst->dev->mtu;
190 }
191
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193                                          struct sk_buff *skb, u32 mtu)
194 {
195 }
196
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198                                       struct sk_buff *skb)
199 {
200 }
201
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203                                          unsigned long old)
204 {
205         return NULL;
206 }
207
208 static struct dst_ops ip6_dst_blackhole_ops = {
209         .family                 =       AF_INET6,
210         .protocol               =       cpu_to_be16(ETH_P_IPV6),
211         .destroy                =       ip6_dst_destroy,
212         .check                  =       ip6_dst_check,
213         .mtu                    =       ip6_blackhole_mtu,
214         .default_advmss         =       ip6_default_advmss,
215         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
216         .redirect               =       ip6_rt_blackhole_redirect,
217         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
218         .neigh_lookup           =       ip6_neigh_lookup,
219 };
220
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222         [RTAX_HOPLIMIT - 1] = 255,
223 };
224
225 static struct rt6_info ip6_null_entry_template = {
226         .dst = {
227                 .__refcnt       = ATOMIC_INIT(1),
228                 .__use          = 1,
229                 .obsolete       = -1,
230                 .error          = -ENETUNREACH,
231                 .input          = ip6_pkt_discard,
232                 .output         = ip6_pkt_discard_out,
233         },
234         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
235         .rt6i_protocol  = RTPROT_KERNEL,
236         .rt6i_metric    = ~(u32) 0,
237         .rt6i_ref       = ATOMIC_INIT(1),
238 };
239
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244
245 static struct rt6_info ip6_prohibit_entry_template = {
246         .dst = {
247                 .__refcnt       = ATOMIC_INIT(1),
248                 .__use          = 1,
249                 .obsolete       = -1,
250                 .error          = -EACCES,
251                 .input          = ip6_pkt_prohibit,
252                 .output         = ip6_pkt_prohibit_out,
253         },
254         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
255         .rt6i_protocol  = RTPROT_KERNEL,
256         .rt6i_metric    = ~(u32) 0,
257         .rt6i_ref       = ATOMIC_INIT(1),
258 };
259
260 static struct rt6_info ip6_blk_hole_entry_template = {
261         .dst = {
262                 .__refcnt       = ATOMIC_INIT(1),
263                 .__use          = 1,
264                 .obsolete       = -1,
265                 .error          = -EINVAL,
266                 .input          = dst_discard,
267                 .output         = dst_discard,
268         },
269         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
270         .rt6i_protocol  = RTPROT_KERNEL,
271         .rt6i_metric    = ~(u32) 0,
272         .rt6i_ref       = ATOMIC_INIT(1),
273 };
274
275 #endif
276
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279                                              struct net_device *dev,
280                                              int flags,
281                                              struct fib6_table *table)
282 {
283         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284                                         0, DST_OBSOLETE_NONE, flags);
285
286         if (rt) {
287                 struct dst_entry *dst = &rt->dst;
288
289                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291         }
292         return rt;
293 }
294
295 static void ip6_dst_destroy(struct dst_entry *dst)
296 {
297         struct rt6_info *rt = (struct rt6_info *)dst;
298         struct inet6_dev *idev = rt->rt6i_idev;
299
300         if (rt->n)
301                 neigh_release(rt->n);
302
303         if (!(rt->dst.flags & DST_HOST))
304                 dst_destroy_metrics_generic(dst);
305
306         if (idev) {
307                 rt->rt6i_idev = NULL;
308                 in6_dev_put(idev);
309         }
310
311         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
312                 dst_release(dst->from);
313
314         if (rt6_has_peer(rt)) {
315                 struct inet_peer *peer = rt6_peer_ptr(rt);
316                 inet_putpeer(peer);
317         }
318 }
319
320 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
321
322 static u32 rt6_peer_genid(void)
323 {
324         return atomic_read(&__rt6_peer_genid);
325 }
326
327 void rt6_bind_peer(struct rt6_info *rt, int create)
328 {
329         struct inet_peer_base *base;
330         struct inet_peer *peer;
331
332         base = inetpeer_base_ptr(rt->_rt6i_peer);
333         if (!base)
334                 return;
335
336         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
337         if (peer) {
338                 if (!rt6_set_peer(rt, peer))
339                         inet_putpeer(peer);
340                 else
341                         rt->rt6i_peer_genid = rt6_peer_genid();
342         }
343 }
344
345 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346                            int how)
347 {
348         struct rt6_info *rt = (struct rt6_info *)dst;
349         struct inet6_dev *idev = rt->rt6i_idev;
350         struct net_device *loopback_dev =
351                 dev_net(dev)->loopback_dev;
352
353         if (dev != loopback_dev) {
354                 if (idev && idev->dev == dev) {
355                         struct inet6_dev *loopback_idev =
356                                 in6_dev_get(loopback_dev);
357                         if (loopback_idev) {
358                                 rt->rt6i_idev = loopback_idev;
359                                 in6_dev_put(idev);
360                         }
361                 }
362                 if (rt->n && rt->n->dev == dev) {
363                         rt->n->dev = loopback_dev;
364                         dev_hold(loopback_dev);
365                         dev_put(dev);
366                 }
367         }
368 }
369
370 static bool rt6_check_expired(const struct rt6_info *rt)
371 {
372         struct rt6_info *ort = NULL;
373
374         if (rt->rt6i_flags & RTF_EXPIRES) {
375                 if (time_after(jiffies, rt->dst.expires))
376                         return true;
377         } else if (rt->dst.from) {
378                 ort = (struct rt6_info *) rt->dst.from;
379                 return (ort->rt6i_flags & RTF_EXPIRES) &&
380                         time_after(jiffies, ort->dst.expires);
381         }
382         return false;
383 }
384
385 static bool rt6_need_strict(const struct in6_addr *daddr)
386 {
387         return ipv6_addr_type(daddr) &
388                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
389 }
390
391 /*
392  *      Route lookup. Any table->tb6_lock is implied.
393  */
394
395 static inline struct rt6_info *rt6_device_match(struct net *net,
396                                                     struct rt6_info *rt,
397                                                     const struct in6_addr *saddr,
398                                                     int oif,
399                                                     int flags)
400 {
401         struct rt6_info *local = NULL;
402         struct rt6_info *sprt;
403
404         if (!oif && ipv6_addr_any(saddr))
405                 goto out;
406
407         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
408                 struct net_device *dev = sprt->dst.dev;
409
410                 if (oif) {
411                         if (dev->ifindex == oif)
412                                 return sprt;
413                         if (dev->flags & IFF_LOOPBACK) {
414                                 if (!sprt->rt6i_idev ||
415                                     sprt->rt6i_idev->dev->ifindex != oif) {
416                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
417                                                 continue;
418                                         if (local && (!oif ||
419                                                       local->rt6i_idev->dev->ifindex == oif))
420                                                 continue;
421                                 }
422                                 local = sprt;
423                         }
424                 } else {
425                         if (ipv6_chk_addr(net, saddr, dev,
426                                           flags & RT6_LOOKUP_F_IFACE))
427                                 return sprt;
428                 }
429         }
430
431         if (oif) {
432                 if (local)
433                         return local;
434
435                 if (flags & RT6_LOOKUP_F_IFACE)
436                         return net->ipv6.ip6_null_entry;
437         }
438 out:
439         return rt;
440 }
441
442 #ifdef CONFIG_IPV6_ROUTER_PREF
443 static void rt6_probe(struct rt6_info *rt)
444 {
445         struct neighbour *neigh;
446         /*
447          * Okay, this does not seem to be appropriate
448          * for now, however, we need to check if it
449          * is really so; aka Router Reachability Probing.
450          *
451          * Router Reachability Probe MUST be rate-limited
452          * to no more than one per minute.
453          */
454         rcu_read_lock();
455         neigh = rt ? rt->n : NULL;
456         if (!neigh || (neigh->nud_state & NUD_VALID))
457                 goto out;
458         read_lock_bh(&neigh->lock);
459         if (!(neigh->nud_state & NUD_VALID) &&
460             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
461                 struct in6_addr mcaddr;
462                 struct in6_addr *target;
463
464                 neigh->updated = jiffies;
465                 read_unlock_bh(&neigh->lock);
466
467                 target = (struct in6_addr *)&neigh->primary_key;
468                 addrconf_addr_solict_mult(target, &mcaddr);
469                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
470         } else {
471                 read_unlock_bh(&neigh->lock);
472         }
473 out:
474         rcu_read_unlock();
475 }
476 #else
477 static inline void rt6_probe(struct rt6_info *rt)
478 {
479 }
480 #endif
481
482 /*
483  * Default Router Selection (RFC 2461 6.3.6)
484  */
485 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
486 {
487         struct net_device *dev = rt->dst.dev;
488         if (!oif || dev->ifindex == oif)
489                 return 2;
490         if ((dev->flags & IFF_LOOPBACK) &&
491             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
492                 return 1;
493         return 0;
494 }
495
496 static inline int rt6_check_neigh(struct rt6_info *rt)
497 {
498         struct neighbour *neigh;
499         int m;
500
501         rcu_read_lock();
502         neigh = rt->n;
503         if (rt->rt6i_flags & RTF_NONEXTHOP ||
504             !(rt->rt6i_flags & RTF_GATEWAY))
505                 m = 1;
506         else if (neigh) {
507                 read_lock_bh(&neigh->lock);
508                 if (neigh->nud_state & NUD_VALID)
509                         m = 2;
510 #ifdef CONFIG_IPV6_ROUTER_PREF
511                 else if (neigh->nud_state & NUD_FAILED)
512                         m = 0;
513 #endif
514                 else
515                         m = 1;
516                 read_unlock_bh(&neigh->lock);
517         } else
518                 m = 0;
519         rcu_read_unlock();
520         return m;
521 }
522
523 static int rt6_score_route(struct rt6_info *rt, int oif,
524                            int strict)
525 {
526         int m, n;
527
528         m = rt6_check_dev(rt, oif);
529         if (!m && (strict & RT6_LOOKUP_F_IFACE))
530                 return -1;
531 #ifdef CONFIG_IPV6_ROUTER_PREF
532         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
533 #endif
534         n = rt6_check_neigh(rt);
535         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
536                 return -1;
537         return m;
538 }
539
540 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
541                                    int *mpri, struct rt6_info *match)
542 {
543         int m;
544
545         if (rt6_check_expired(rt))
546                 goto out;
547
548         m = rt6_score_route(rt, oif, strict);
549         if (m < 0)
550                 goto out;
551
552         if (m > *mpri) {
553                 if (strict & RT6_LOOKUP_F_REACHABLE)
554                         rt6_probe(match);
555                 *mpri = m;
556                 match = rt;
557         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
558                 rt6_probe(rt);
559         }
560
561 out:
562         return match;
563 }
564
565 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
566                                      struct rt6_info *rr_head,
567                                      u32 metric, int oif, int strict)
568 {
569         struct rt6_info *rt, *match;
570         int mpri = -1;
571
572         match = NULL;
573         for (rt = rr_head; rt && rt->rt6i_metric == metric;
574              rt = rt->dst.rt6_next)
575                 match = find_match(rt, oif, strict, &mpri, match);
576         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
577              rt = rt->dst.rt6_next)
578                 match = find_match(rt, oif, strict, &mpri, match);
579
580         return match;
581 }
582
583 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
584 {
585         struct rt6_info *match, *rt0;
586         struct net *net;
587
588         rt0 = fn->rr_ptr;
589         if (!rt0)
590                 fn->rr_ptr = rt0 = fn->leaf;
591
592         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
593
594         if (!match &&
595             (strict & RT6_LOOKUP_F_REACHABLE)) {
596                 struct rt6_info *next = rt0->dst.rt6_next;
597
598                 /* no entries matched; do round-robin */
599                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
600                         next = fn->leaf;
601
602                 if (next != rt0)
603                         fn->rr_ptr = next;
604         }
605
606         net = dev_net(rt0->dst.dev);
607         return match ? match : net->ipv6.ip6_null_entry;
608 }
609
610 #ifdef CONFIG_IPV6_ROUTE_INFO
611 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
612                   const struct in6_addr *gwaddr)
613 {
614         struct net *net = dev_net(dev);
615         struct route_info *rinfo = (struct route_info *) opt;
616         struct in6_addr prefix_buf, *prefix;
617         unsigned int pref;
618         unsigned long lifetime;
619         struct rt6_info *rt;
620
621         if (len < sizeof(struct route_info)) {
622                 return -EINVAL;
623         }
624
625         /* Sanity check for prefix_len and length */
626         if (rinfo->length > 3) {
627                 return -EINVAL;
628         } else if (rinfo->prefix_len > 128) {
629                 return -EINVAL;
630         } else if (rinfo->prefix_len > 64) {
631                 if (rinfo->length < 2) {
632                         return -EINVAL;
633                 }
634         } else if (rinfo->prefix_len > 0) {
635                 if (rinfo->length < 1) {
636                         return -EINVAL;
637                 }
638         }
639
640         pref = rinfo->route_pref;
641         if (pref == ICMPV6_ROUTER_PREF_INVALID)
642                 return -EINVAL;
643
644         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
645
646         if (rinfo->length == 3)
647                 prefix = (struct in6_addr *)rinfo->prefix;
648         else {
649                 /* this function is safe */
650                 ipv6_addr_prefix(&prefix_buf,
651                                  (struct in6_addr *)rinfo->prefix,
652                                  rinfo->prefix_len);
653                 prefix = &prefix_buf;
654         }
655
656         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
657                                 dev->ifindex);
658
659         if (rt && !lifetime) {
660                 ip6_del_rt(rt);
661                 rt = NULL;
662         }
663
664         if (!rt && lifetime)
665                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
666                                         pref);
667         else if (rt)
668                 rt->rt6i_flags = RTF_ROUTEINFO |
669                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
670
671         if (rt) {
672                 if (!addrconf_finite_timeout(lifetime))
673                         rt6_clean_expires(rt);
674                 else
675                         rt6_set_expires(rt, jiffies + HZ * lifetime);
676
677                 dst_release(&rt->dst);
678         }
679         return 0;
680 }
681 #endif
682
683 #define BACKTRACK(__net, saddr)                 \
684 do { \
685         if (rt == __net->ipv6.ip6_null_entry) { \
686                 struct fib6_node *pn; \
687                 while (1) { \
688                         if (fn->fn_flags & RTN_TL_ROOT) \
689                                 goto out; \
690                         pn = fn->parent; \
691                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
692                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
693                         else \
694                                 fn = pn; \
695                         if (fn->fn_flags & RTN_RTINFO) \
696                                 goto restart; \
697                 } \
698         } \
699 } while (0)
700
701 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
702                                              struct fib6_table *table,
703                                              struct flowi6 *fl6, int flags)
704 {
705         struct fib6_node *fn;
706         struct rt6_info *rt;
707
708         read_lock_bh(&table->tb6_lock);
709         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
710 restart:
711         rt = fn->leaf;
712         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
713         BACKTRACK(net, &fl6->saddr);
714 out:
715         dst_use(&rt->dst, jiffies);
716         read_unlock_bh(&table->tb6_lock);
717         return rt;
718
719 }
720
721 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
722                                     int flags)
723 {
724         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
725 }
726 EXPORT_SYMBOL_GPL(ip6_route_lookup);
727
728 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
729                             const struct in6_addr *saddr, int oif, int strict)
730 {
731         struct flowi6 fl6 = {
732                 .flowi6_oif = oif,
733                 .daddr = *daddr,
734         };
735         struct dst_entry *dst;
736         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
737
738         if (saddr) {
739                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
740                 flags |= RT6_LOOKUP_F_HAS_SADDR;
741         }
742
743         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
744         if (dst->error == 0)
745                 return (struct rt6_info *) dst;
746
747         dst_release(dst);
748
749         return NULL;
750 }
751
752 EXPORT_SYMBOL(rt6_lookup);
753
754 /* ip6_ins_rt is called with FREE table->tb6_lock.
755    It takes new route entry, the addition fails by any reason the
756    route is freed. In any case, if caller does not hold it, it may
757    be destroyed.
758  */
759
760 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
761 {
762         int err;
763         struct fib6_table *table;
764
765         table = rt->rt6i_table;
766         write_lock_bh(&table->tb6_lock);
767         err = fib6_add(&table->tb6_root, rt, info);
768         write_unlock_bh(&table->tb6_lock);
769
770         return err;
771 }
772
773 int ip6_ins_rt(struct rt6_info *rt)
774 {
775         struct nl_info info = {
776                 .nl_net = dev_net(rt->dst.dev),
777         };
778         return __ip6_ins_rt(rt, &info);
779 }
780
781 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
782                                       const struct in6_addr *daddr,
783                                       const struct in6_addr *saddr)
784 {
785         struct rt6_info *rt;
786
787         /*
788          *      Clone the route.
789          */
790
791         rt = ip6_rt_copy(ort, daddr);
792
793         if (rt) {
794                 int attempts = !in_softirq();
795
796                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
797                         if (ort->rt6i_dst.plen != 128 &&
798                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
799                                 rt->rt6i_flags |= RTF_ANYCAST;
800                         rt->rt6i_gateway = *daddr;
801                 }
802
803                 rt->rt6i_flags |= RTF_CACHE;
804
805 #ifdef CONFIG_IPV6_SUBTREES
806                 if (rt->rt6i_src.plen && saddr) {
807                         rt->rt6i_src.addr = *saddr;
808                         rt->rt6i_src.plen = 128;
809                 }
810 #endif
811
812         retry:
813                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
814                         struct net *net = dev_net(rt->dst.dev);
815                         int saved_rt_min_interval =
816                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
817                         int saved_rt_elasticity =
818                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
819
820                         if (attempts-- > 0) {
821                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
822                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
823
824                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
825
826                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
827                                         saved_rt_elasticity;
828                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
829                                         saved_rt_min_interval;
830                                 goto retry;
831                         }
832
833                         net_warn_ratelimited("Neighbour table overflow\n");
834                         dst_free(&rt->dst);
835                         return NULL;
836                 }
837         }
838
839         return rt;
840 }
841
842 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
843                                         const struct in6_addr *daddr)
844 {
845         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
846
847         if (rt) {
848                 rt->rt6i_flags |= RTF_CACHE;
849                 rt->n = neigh_clone(ort->n);
850         }
851         return rt;
852 }
853
854 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
855                                       struct flowi6 *fl6, int flags)
856 {
857         struct fib6_node *fn;
858         struct rt6_info *rt, *nrt;
859         int strict = 0;
860         int attempts = 3;
861         int err;
862         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
863
864         strict |= flags & RT6_LOOKUP_F_IFACE;
865
866 relookup:
867         read_lock_bh(&table->tb6_lock);
868
869 restart_2:
870         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
871
872 restart:
873         rt = rt6_select(fn, oif, strict | reachable);
874
875         BACKTRACK(net, &fl6->saddr);
876         if (rt == net->ipv6.ip6_null_entry ||
877             rt->rt6i_flags & RTF_CACHE)
878                 goto out;
879
880         dst_hold(&rt->dst);
881         read_unlock_bh(&table->tb6_lock);
882
883         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
884                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
885         else if (!(rt->dst.flags & DST_HOST))
886                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
887         else
888                 goto out2;
889
890         dst_release(&rt->dst);
891         rt = nrt ? : net->ipv6.ip6_null_entry;
892
893         dst_hold(&rt->dst);
894         if (nrt) {
895                 err = ip6_ins_rt(nrt);
896                 if (!err)
897                         goto out2;
898         }
899
900         if (--attempts <= 0)
901                 goto out2;
902
903         /*
904          * Race condition! In the gap, when table->tb6_lock was
905          * released someone could insert this route.  Relookup.
906          */
907         dst_release(&rt->dst);
908         goto relookup;
909
910 out:
911         if (reachable) {
912                 reachable = 0;
913                 goto restart_2;
914         }
915         dst_hold(&rt->dst);
916         read_unlock_bh(&table->tb6_lock);
917 out2:
918         rt->dst.lastuse = jiffies;
919         rt->dst.__use++;
920
921         return rt;
922 }
923
924 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
925                                             struct flowi6 *fl6, int flags)
926 {
927         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
928 }
929
930 static struct dst_entry *ip6_route_input_lookup(struct net *net,
931                                                 struct net_device *dev,
932                                                 struct flowi6 *fl6, int flags)
933 {
934         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
935                 flags |= RT6_LOOKUP_F_IFACE;
936
937         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
938 }
939
940 void ip6_route_input(struct sk_buff *skb)
941 {
942         const struct ipv6hdr *iph = ipv6_hdr(skb);
943         struct net *net = dev_net(skb->dev);
944         int flags = RT6_LOOKUP_F_HAS_SADDR;
945         struct flowi6 fl6 = {
946                 .flowi6_iif = skb->dev->ifindex,
947                 .daddr = iph->daddr,
948                 .saddr = iph->saddr,
949                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
950                 .flowi6_mark = skb->mark,
951                 .flowi6_proto = iph->nexthdr,
952         };
953
954         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
955 }
956
957 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
958                                              struct flowi6 *fl6, int flags)
959 {
960         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
961 }
962
963 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
964                                     struct flowi6 *fl6)
965 {
966         int flags = 0;
967
968         fl6->flowi6_iif = LOOPBACK_IFINDEX;
969
970         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
971                 flags |= RT6_LOOKUP_F_IFACE;
972
973         if (!ipv6_addr_any(&fl6->saddr))
974                 flags |= RT6_LOOKUP_F_HAS_SADDR;
975         else if (sk)
976                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
977
978         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
979 }
980
981 EXPORT_SYMBOL(ip6_route_output);
982
983 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
984 {
985         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
986         struct dst_entry *new = NULL;
987
988         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
989         if (rt) {
990                 new = &rt->dst;
991
992                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
993                 rt6_init_peer(rt, net->ipv6.peers);
994
995                 new->__use = 1;
996                 new->input = dst_discard;
997                 new->output = dst_discard;
998
999                 if (dst_metrics_read_only(&ort->dst))
1000                         new->_metrics = ort->dst._metrics;
1001                 else
1002                         dst_copy_metrics(new, &ort->dst);
1003                 rt->rt6i_idev = ort->rt6i_idev;
1004                 if (rt->rt6i_idev)
1005                         in6_dev_hold(rt->rt6i_idev);
1006
1007                 rt->rt6i_gateway = ort->rt6i_gateway;
1008                 rt->rt6i_flags = ort->rt6i_flags;
1009                 rt6_clean_expires(rt);
1010                 rt->rt6i_metric = 0;
1011
1012                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1013 #ifdef CONFIG_IPV6_SUBTREES
1014                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1015 #endif
1016
1017                 dst_free(new);
1018         }
1019
1020         dst_release(dst_orig);
1021         return new ? new : ERR_PTR(-ENOMEM);
1022 }
1023
1024 /*
1025  *      Destination cache support functions
1026  */
1027
1028 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1029 {
1030         struct rt6_info *rt;
1031
1032         rt = (struct rt6_info *) dst;
1033
1034         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1035                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1036                         if (!rt6_has_peer(rt))
1037                                 rt6_bind_peer(rt, 0);
1038                         rt->rt6i_peer_genid = rt6_peer_genid();
1039                 }
1040                 return dst;
1041         }
1042         return NULL;
1043 }
1044
1045 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1046 {
1047         struct rt6_info *rt = (struct rt6_info *) dst;
1048
1049         if (rt) {
1050                 if (rt->rt6i_flags & RTF_CACHE) {
1051                         if (rt6_check_expired(rt)) {
1052                                 ip6_del_rt(rt);
1053                                 dst = NULL;
1054                         }
1055                 } else {
1056                         dst_release(dst);
1057                         dst = NULL;
1058                 }
1059         }
1060         return dst;
1061 }
1062
1063 static void ip6_link_failure(struct sk_buff *skb)
1064 {
1065         struct rt6_info *rt;
1066
1067         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1068
1069         rt = (struct rt6_info *) skb_dst(skb);
1070         if (rt) {
1071                 if (rt->rt6i_flags & RTF_CACHE)
1072                         rt6_update_expires(rt, 0);
1073                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1074                         rt->rt6i_node->fn_sernum = -1;
1075         }
1076 }
1077
1078 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1079                                struct sk_buff *skb, u32 mtu)
1080 {
1081         struct rt6_info *rt6 = (struct rt6_info*)dst;
1082
1083         dst_confirm(dst);
1084         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1085                 struct net *net = dev_net(dst->dev);
1086
1087                 rt6->rt6i_flags |= RTF_MODIFIED;
1088                 if (mtu < IPV6_MIN_MTU) {
1089                         u32 features = dst_metric(dst, RTAX_FEATURES);
1090                         mtu = IPV6_MIN_MTU;
1091                         features |= RTAX_FEATURE_ALLFRAG;
1092                         dst_metric_set(dst, RTAX_FEATURES, features);
1093                 }
1094                 dst_metric_set(dst, RTAX_MTU, mtu);
1095                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1096         }
1097 }
1098
1099 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1100                      int oif, u32 mark)
1101 {
1102         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1103         struct dst_entry *dst;
1104         struct flowi6 fl6;
1105
1106         memset(&fl6, 0, sizeof(fl6));
1107         fl6.flowi6_oif = oif;
1108         fl6.flowi6_mark = mark;
1109         fl6.flowi6_flags = 0;
1110         fl6.daddr = iph->daddr;
1111         fl6.saddr = iph->saddr;
1112         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1113
1114         dst = ip6_route_output(net, NULL, &fl6);
1115         if (!dst->error)
1116                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1117         dst_release(dst);
1118 }
1119 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1120
1121 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1122 {
1123         ip6_update_pmtu(skb, sock_net(sk), mtu,
1124                         sk->sk_bound_dev_if, sk->sk_mark);
1125 }
1126 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1127
1128 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1129 {
1130         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1131         struct dst_entry *dst;
1132         struct flowi6 fl6;
1133
1134         memset(&fl6, 0, sizeof(fl6));
1135         fl6.flowi6_oif = oif;
1136         fl6.flowi6_mark = mark;
1137         fl6.flowi6_flags = 0;
1138         fl6.daddr = iph->daddr;
1139         fl6.saddr = iph->saddr;
1140         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1141
1142         dst = ip6_route_output(net, NULL, &fl6);
1143         if (!dst->error)
1144                 rt6_do_redirect(dst, NULL, skb);
1145         dst_release(dst);
1146 }
1147 EXPORT_SYMBOL_GPL(ip6_redirect);
1148
1149 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1150 {
1151         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1152 }
1153 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1154
1155 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1156 {
1157         struct net_device *dev = dst->dev;
1158         unsigned int mtu = dst_mtu(dst);
1159         struct net *net = dev_net(dev);
1160
1161         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1162
1163         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1164                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1165
1166         /*
1167          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1168          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1169          * IPV6_MAXPLEN is also valid and means: "any MSS,
1170          * rely only on pmtu discovery"
1171          */
1172         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1173                 mtu = IPV6_MAXPLEN;
1174         return mtu;
1175 }
1176
1177 static unsigned int ip6_mtu(const struct dst_entry *dst)
1178 {
1179         struct inet6_dev *idev;
1180         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1181
1182         if (mtu)
1183                 return mtu;
1184
1185         mtu = IPV6_MIN_MTU;
1186
1187         rcu_read_lock();
1188         idev = __in6_dev_get(dst->dev);
1189         if (idev)
1190                 mtu = idev->cnf.mtu6;
1191         rcu_read_unlock();
1192
1193         return mtu;
1194 }
1195
1196 static struct dst_entry *icmp6_dst_gc_list;
1197 static DEFINE_SPINLOCK(icmp6_dst_lock);
1198
1199 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1200                                   struct neighbour *neigh,
1201                                   struct flowi6 *fl6)
1202 {
1203         struct dst_entry *dst;
1204         struct rt6_info *rt;
1205         struct inet6_dev *idev = in6_dev_get(dev);
1206         struct net *net = dev_net(dev);
1207
1208         if (unlikely(!idev))
1209                 return ERR_PTR(-ENODEV);
1210
1211         rt = ip6_dst_alloc(net, dev, 0, NULL);
1212         if (unlikely(!rt)) {
1213                 in6_dev_put(idev);
1214                 dst = ERR_PTR(-ENOMEM);
1215                 goto out;
1216         }
1217
1218         if (neigh)
1219                 neigh_hold(neigh);
1220         else {
1221                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1222                 if (IS_ERR(neigh)) {
1223                         in6_dev_put(idev);
1224                         dst_free(&rt->dst);
1225                         return ERR_CAST(neigh);
1226                 }
1227         }
1228
1229         rt->dst.flags |= DST_HOST;
1230         rt->dst.output  = ip6_output;
1231         rt->n = neigh;
1232         atomic_set(&rt->dst.__refcnt, 1);
1233         rt->rt6i_dst.addr = fl6->daddr;
1234         rt->rt6i_dst.plen = 128;
1235         rt->rt6i_idev     = idev;
1236         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1237
1238         spin_lock_bh(&icmp6_dst_lock);
1239         rt->dst.next = icmp6_dst_gc_list;
1240         icmp6_dst_gc_list = &rt->dst;
1241         spin_unlock_bh(&icmp6_dst_lock);
1242
1243         fib6_force_start_gc(net);
1244
1245         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1246
1247 out:
1248         return dst;
1249 }
1250
1251 int icmp6_dst_gc(void)
1252 {
1253         struct dst_entry *dst, **pprev;
1254         int more = 0;
1255
1256         spin_lock_bh(&icmp6_dst_lock);
1257         pprev = &icmp6_dst_gc_list;
1258
1259         while ((dst = *pprev) != NULL) {
1260                 if (!atomic_read(&dst->__refcnt)) {
1261                         *pprev = dst->next;
1262                         dst_free(dst);
1263                 } else {
1264                         pprev = &dst->next;
1265                         ++more;
1266                 }
1267         }
1268
1269         spin_unlock_bh(&icmp6_dst_lock);
1270
1271         return more;
1272 }
1273
1274 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1275                             void *arg)
1276 {
1277         struct dst_entry *dst, **pprev;
1278
1279         spin_lock_bh(&icmp6_dst_lock);
1280         pprev = &icmp6_dst_gc_list;
1281         while ((dst = *pprev) != NULL) {
1282                 struct rt6_info *rt = (struct rt6_info *) dst;
1283                 if (func(rt, arg)) {
1284                         *pprev = dst->next;
1285                         dst_free(dst);
1286                 } else {
1287                         pprev = &dst->next;
1288                 }
1289         }
1290         spin_unlock_bh(&icmp6_dst_lock);
1291 }
1292
1293 static int ip6_dst_gc(struct dst_ops *ops)
1294 {
1295         unsigned long now = jiffies;
1296         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1297         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1298         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1299         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1300         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1301         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1302         int entries;
1303
1304         entries = dst_entries_get_fast(ops);
1305         if (time_after(rt_last_gc + rt_min_interval, now) &&
1306             entries <= rt_max_size)
1307                 goto out;
1308
1309         net->ipv6.ip6_rt_gc_expire++;
1310         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1311         net->ipv6.ip6_rt_last_gc = now;
1312         entries = dst_entries_get_slow(ops);
1313         if (entries < ops->gc_thresh)
1314                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1315 out:
1316         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1317         return entries > rt_max_size;
1318 }
1319
1320 /* Clean host part of a prefix. Not necessary in radix tree,
1321    but results in cleaner routing tables.
1322
1323    Remove it only when all the things will work!
1324  */
1325
1326 int ip6_dst_hoplimit(struct dst_entry *dst)
1327 {
1328         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1329         if (hoplimit == 0) {
1330                 struct net_device *dev = dst->dev;
1331                 struct inet6_dev *idev;
1332
1333                 rcu_read_lock();
1334                 idev = __in6_dev_get(dev);
1335                 if (idev)
1336                         hoplimit = idev->cnf.hop_limit;
1337                 else
1338                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1339                 rcu_read_unlock();
1340         }
1341         return hoplimit;
1342 }
1343 EXPORT_SYMBOL(ip6_dst_hoplimit);
1344
1345 /*
1346  *
1347  */
1348
1349 int ip6_route_add(struct fib6_config *cfg)
1350 {
1351         int err;
1352         struct net *net = cfg->fc_nlinfo.nl_net;
1353         struct rt6_info *rt = NULL;
1354         struct net_device *dev = NULL;
1355         struct inet6_dev *idev = NULL;
1356         struct fib6_table *table;
1357         int addr_type;
1358
1359         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1360                 return -EINVAL;
1361 #ifndef CONFIG_IPV6_SUBTREES
1362         if (cfg->fc_src_len)
1363                 return -EINVAL;
1364 #endif
1365         if (cfg->fc_ifindex) {
1366                 err = -ENODEV;
1367                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1368                 if (!dev)
1369                         goto out;
1370                 idev = in6_dev_get(dev);
1371                 if (!idev)
1372                         goto out;
1373         }
1374
1375         if (cfg->fc_metric == 0)
1376                 cfg->fc_metric = IP6_RT_PRIO_USER;
1377
1378         err = -ENOBUFS;
1379         if (cfg->fc_nlinfo.nlh &&
1380             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1381                 table = fib6_get_table(net, cfg->fc_table);
1382                 if (!table) {
1383                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1384                         table = fib6_new_table(net, cfg->fc_table);
1385                 }
1386         } else {
1387                 table = fib6_new_table(net, cfg->fc_table);
1388         }
1389
1390         if (!table)
1391                 goto out;
1392
1393         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1394
1395         if (!rt) {
1396                 err = -ENOMEM;
1397                 goto out;
1398         }
1399
1400         rt->dst.obsolete = -1;
1401
1402         if (cfg->fc_flags & RTF_EXPIRES)
1403                 rt6_set_expires(rt, jiffies +
1404                                 clock_t_to_jiffies(cfg->fc_expires));
1405         else
1406                 rt6_clean_expires(rt);
1407
1408         if (cfg->fc_protocol == RTPROT_UNSPEC)
1409                 cfg->fc_protocol = RTPROT_BOOT;
1410         rt->rt6i_protocol = cfg->fc_protocol;
1411
1412         addr_type = ipv6_addr_type(&cfg->fc_dst);
1413
1414         if (addr_type & IPV6_ADDR_MULTICAST)
1415                 rt->dst.input = ip6_mc_input;
1416         else if (cfg->fc_flags & RTF_LOCAL)
1417                 rt->dst.input = ip6_input;
1418         else
1419                 rt->dst.input = ip6_forward;
1420
1421         rt->dst.output = ip6_output;
1422
1423         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1424         rt->rt6i_dst.plen = cfg->fc_dst_len;
1425         if (rt->rt6i_dst.plen == 128)
1426                rt->dst.flags |= DST_HOST;
1427
1428         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1429                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1430                 if (!metrics) {
1431                         err = -ENOMEM;
1432                         goto out;
1433                 }
1434                 dst_init_metrics(&rt->dst, metrics, 0);
1435         }
1436 #ifdef CONFIG_IPV6_SUBTREES
1437         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1438         rt->rt6i_src.plen = cfg->fc_src_len;
1439 #endif
1440
1441         rt->rt6i_metric = cfg->fc_metric;
1442
1443         /* We cannot add true routes via loopback here,
1444            they would result in kernel looping; promote them to reject routes
1445          */
1446         if ((cfg->fc_flags & RTF_REJECT) ||
1447             (dev && (dev->flags & IFF_LOOPBACK) &&
1448              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1449              !(cfg->fc_flags & RTF_LOCAL))) {
1450                 /* hold loopback dev/idev if we haven't done so. */
1451                 if (dev != net->loopback_dev) {
1452                         if (dev) {
1453                                 dev_put(dev);
1454                                 in6_dev_put(idev);
1455                         }
1456                         dev = net->loopback_dev;
1457                         dev_hold(dev);
1458                         idev = in6_dev_get(dev);
1459                         if (!idev) {
1460                                 err = -ENODEV;
1461                                 goto out;
1462                         }
1463                 }
1464                 rt->dst.output = ip6_pkt_discard_out;
1465                 rt->dst.input = ip6_pkt_discard;
1466                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1467                 switch (cfg->fc_type) {
1468                 case RTN_BLACKHOLE:
1469                         rt->dst.error = -EINVAL;
1470                         break;
1471                 case RTN_PROHIBIT:
1472                         rt->dst.error = -EACCES;
1473                         break;
1474                 case RTN_THROW:
1475                         rt->dst.error = -EAGAIN;
1476                         break;
1477                 default:
1478                         rt->dst.error = -ENETUNREACH;
1479                         break;
1480                 }
1481                 goto install_route;
1482         }
1483
1484         if (cfg->fc_flags & RTF_GATEWAY) {
1485                 const struct in6_addr *gw_addr;
1486                 int gwa_type;
1487
1488                 gw_addr = &cfg->fc_gateway;
1489                 rt->rt6i_gateway = *gw_addr;
1490                 gwa_type = ipv6_addr_type(gw_addr);
1491
1492                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1493                         struct rt6_info *grt;
1494
1495                         /* IPv6 strictly inhibits using not link-local
1496                            addresses as nexthop address.
1497                            Otherwise, router will not able to send redirects.
1498                            It is very good, but in some (rare!) circumstances
1499                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1500                            some exceptions. --ANK
1501                          */
1502                         err = -EINVAL;
1503                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1504                                 goto out;
1505
1506                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1507
1508                         err = -EHOSTUNREACH;
1509                         if (!grt)
1510                                 goto out;
1511                         if (dev) {
1512                                 if (dev != grt->dst.dev) {
1513                                         dst_release(&grt->dst);
1514                                         goto out;
1515                                 }
1516                         } else {
1517                                 dev = grt->dst.dev;
1518                                 idev = grt->rt6i_idev;
1519                                 dev_hold(dev);
1520                                 in6_dev_hold(grt->rt6i_idev);
1521                         }
1522                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1523                                 err = 0;
1524                         dst_release(&grt->dst);
1525
1526                         if (err)
1527                                 goto out;
1528                 }
1529                 err = -EINVAL;
1530                 if (!dev || (dev->flags & IFF_LOOPBACK))
1531                         goto out;
1532         }
1533
1534         err = -ENODEV;
1535         if (!dev)
1536                 goto out;
1537
1538         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1539                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1540                         err = -EINVAL;
1541                         goto out;
1542                 }
1543                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1544                 rt->rt6i_prefsrc.plen = 128;
1545         } else
1546                 rt->rt6i_prefsrc.plen = 0;
1547
1548         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1549                 err = rt6_bind_neighbour(rt, dev);
1550                 if (err)
1551                         goto out;
1552         }
1553
1554         rt->rt6i_flags = cfg->fc_flags;
1555
1556 install_route:
1557         if (cfg->fc_mx) {
1558                 struct nlattr *nla;
1559                 int remaining;
1560
1561                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1562                         int type = nla_type(nla);
1563
1564                         if (type) {
1565                                 if (type > RTAX_MAX) {
1566                                         err = -EINVAL;
1567                                         goto out;
1568                                 }
1569
1570                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1571                         }
1572                 }
1573         }
1574
1575         rt->dst.dev = dev;
1576         rt->rt6i_idev = idev;
1577         rt->rt6i_table = table;
1578
1579         cfg->fc_nlinfo.nl_net = dev_net(dev);
1580
1581         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1582
1583 out:
1584         if (dev)
1585                 dev_put(dev);
1586         if (idev)
1587                 in6_dev_put(idev);
1588         if (rt)
1589                 dst_free(&rt->dst);
1590         return err;
1591 }
1592
1593 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1594 {
1595         int err;
1596         struct fib6_table *table;
1597         struct net *net = dev_net(rt->dst.dev);
1598
1599         if (rt == net->ipv6.ip6_null_entry)
1600                 return -ENOENT;
1601
1602         table = rt->rt6i_table;
1603         write_lock_bh(&table->tb6_lock);
1604
1605         err = fib6_del(rt, info);
1606         dst_release(&rt->dst);
1607
1608         write_unlock_bh(&table->tb6_lock);
1609
1610         return err;
1611 }
1612
1613 int ip6_del_rt(struct rt6_info *rt)
1614 {
1615         struct nl_info info = {
1616                 .nl_net = dev_net(rt->dst.dev),
1617         };
1618         return __ip6_del_rt(rt, &info);
1619 }
1620
1621 static int ip6_route_del(struct fib6_config *cfg)
1622 {
1623         struct fib6_table *table;
1624         struct fib6_node *fn;
1625         struct rt6_info *rt;
1626         int err = -ESRCH;
1627
1628         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1629         if (!table)
1630                 return err;
1631
1632         read_lock_bh(&table->tb6_lock);
1633
1634         fn = fib6_locate(&table->tb6_root,
1635                          &cfg->fc_dst, cfg->fc_dst_len,
1636                          &cfg->fc_src, cfg->fc_src_len);
1637
1638         if (fn) {
1639                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1640                         if (cfg->fc_ifindex &&
1641                             (!rt->dst.dev ||
1642                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1643                                 continue;
1644                         if (cfg->fc_flags & RTF_GATEWAY &&
1645                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1646                                 continue;
1647                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1648                                 continue;
1649                         dst_hold(&rt->dst);
1650                         read_unlock_bh(&table->tb6_lock);
1651
1652                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1653                 }
1654         }
1655         read_unlock_bh(&table->tb6_lock);
1656
1657         return err;
1658 }
1659
1660 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1661 {
1662         struct net *net = dev_net(skb->dev);
1663         struct netevent_redirect netevent;
1664         struct rt6_info *rt, *nrt = NULL;
1665         const struct in6_addr *target;
1666         struct ndisc_options ndopts;
1667         const struct in6_addr *dest;
1668         struct neighbour *old_neigh;
1669         struct inet6_dev *in6_dev;
1670         struct neighbour *neigh;
1671         struct icmp6hdr *icmph;
1672         int optlen, on_link;
1673         u8 *lladdr;
1674
1675         optlen = skb->tail - skb->transport_header;
1676         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1677
1678         if (optlen < 0) {
1679                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1680                 return;
1681         }
1682
1683         icmph = icmp6_hdr(skb);
1684         target = (const struct in6_addr *) (icmph + 1);
1685         dest = target + 1;
1686
1687         if (ipv6_addr_is_multicast(dest)) {
1688                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1689                 return;
1690         }
1691
1692         on_link = 0;
1693         if (ipv6_addr_equal(dest, target)) {
1694                 on_link = 1;
1695         } else if (ipv6_addr_type(target) !=
1696                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1697                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1698                 return;
1699         }
1700
1701         in6_dev = __in6_dev_get(skb->dev);
1702         if (!in6_dev)
1703                 return;
1704         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1705                 return;
1706
1707         /* RFC2461 8.1:
1708          *      The IP source address of the Redirect MUST be the same as the current
1709          *      first-hop router for the specified ICMP Destination Address.
1710          */
1711
1712         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1713                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1714                 return;
1715         }
1716
1717         lladdr = NULL;
1718         if (ndopts.nd_opts_tgt_lladdr) {
1719                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1720                                              skb->dev);
1721                 if (!lladdr) {
1722                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1723                         return;
1724                 }
1725         }
1726
1727         rt = (struct rt6_info *) dst;
1728         if (rt == net->ipv6.ip6_null_entry) {
1729                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1730                 return;
1731         }
1732
1733         /* Redirect received -> path was valid.
1734          * Look, redirects are sent only in response to data packets,
1735          * so that this nexthop apparently is reachable. --ANK
1736          */
1737         dst_confirm(&rt->dst);
1738
1739         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1740         if (!neigh)
1741                 return;
1742
1743         /* Duplicate redirect: silently ignore. */
1744         old_neigh = rt->n;
1745         if (neigh == old_neigh)
1746                 goto out;
1747
1748         /*
1749          *      We have finally decided to accept it.
1750          */
1751
1752         neigh_update(neigh, lladdr, NUD_STALE,
1753                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1754                      NEIGH_UPDATE_F_OVERRIDE|
1755                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1756                                      NEIGH_UPDATE_F_ISROUTER))
1757                      );
1758
1759         nrt = ip6_rt_copy(rt, dest);
1760         if (!nrt)
1761                 goto out;
1762
1763         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1764         if (on_link)
1765                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1766
1767         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1768         nrt->n = neigh_clone(neigh);
1769
1770         if (ip6_ins_rt(nrt))
1771                 goto out;
1772
1773         netevent.old = &rt->dst;
1774         netevent.old_neigh = old_neigh;
1775         netevent.new = &nrt->dst;
1776         netevent.new_neigh = neigh;
1777         netevent.daddr = dest;
1778         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1779
1780         if (rt->rt6i_flags & RTF_CACHE) {
1781                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1782                 ip6_del_rt(rt);
1783         }
1784
1785 out:
1786         neigh_release(neigh);
1787 }
1788
1789 /*
1790  *      Misc support functions
1791  */
1792
1793 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1794                                     const struct in6_addr *dest)
1795 {
1796         struct net *net = dev_net(ort->dst.dev);
1797         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1798                                             ort->rt6i_table);
1799
1800         if (rt) {
1801                 rt->dst.input = ort->dst.input;
1802                 rt->dst.output = ort->dst.output;
1803                 rt->dst.flags |= DST_HOST;
1804
1805                 rt->rt6i_dst.addr = *dest;
1806                 rt->rt6i_dst.plen = 128;
1807                 dst_copy_metrics(&rt->dst, &ort->dst);
1808                 rt->dst.error = ort->dst.error;
1809                 rt->rt6i_idev = ort->rt6i_idev;
1810                 if (rt->rt6i_idev)
1811                         in6_dev_hold(rt->rt6i_idev);
1812                 rt->dst.lastuse = jiffies;
1813
1814                 rt->rt6i_gateway = ort->rt6i_gateway;
1815                 rt->rt6i_flags = ort->rt6i_flags;
1816                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1817                     (RTF_DEFAULT | RTF_ADDRCONF))
1818                         rt6_set_from(rt, ort);
1819                 else
1820                         rt6_clean_expires(rt);
1821                 rt->rt6i_metric = 0;
1822
1823 #ifdef CONFIG_IPV6_SUBTREES
1824                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1825 #endif
1826                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1827                 rt->rt6i_table = ort->rt6i_table;
1828         }
1829         return rt;
1830 }
1831
1832 #ifdef CONFIG_IPV6_ROUTE_INFO
1833 static struct rt6_info *rt6_get_route_info(struct net *net,
1834                                            const struct in6_addr *prefix, int prefixlen,
1835                                            const struct in6_addr *gwaddr, int ifindex)
1836 {
1837         struct fib6_node *fn;
1838         struct rt6_info *rt = NULL;
1839         struct fib6_table *table;
1840
1841         table = fib6_get_table(net, RT6_TABLE_INFO);
1842         if (!table)
1843                 return NULL;
1844
1845         write_lock_bh(&table->tb6_lock);
1846         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1847         if (!fn)
1848                 goto out;
1849
1850         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1851                 if (rt->dst.dev->ifindex != ifindex)
1852                         continue;
1853                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1854                         continue;
1855                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1856                         continue;
1857                 dst_hold(&rt->dst);
1858                 break;
1859         }
1860 out:
1861         write_unlock_bh(&table->tb6_lock);
1862         return rt;
1863 }
1864
1865 static struct rt6_info *rt6_add_route_info(struct net *net,
1866                                            const struct in6_addr *prefix, int prefixlen,
1867                                            const struct in6_addr *gwaddr, int ifindex,
1868                                            unsigned int pref)
1869 {
1870         struct fib6_config cfg = {
1871                 .fc_table       = RT6_TABLE_INFO,
1872                 .fc_metric      = IP6_RT_PRIO_USER,
1873                 .fc_ifindex     = ifindex,
1874                 .fc_dst_len     = prefixlen,
1875                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1876                                   RTF_UP | RTF_PREF(pref),
1877                 .fc_nlinfo.portid = 0,
1878                 .fc_nlinfo.nlh = NULL,
1879                 .fc_nlinfo.nl_net = net,
1880         };
1881
1882         cfg.fc_dst = *prefix;
1883         cfg.fc_gateway = *gwaddr;
1884
1885         /* We should treat it as a default route if prefix length is 0. */
1886         if (!prefixlen)
1887                 cfg.fc_flags |= RTF_DEFAULT;
1888
1889         ip6_route_add(&cfg);
1890
1891         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1892 }
1893 #endif
1894
1895 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1896 {
1897         struct rt6_info *rt;
1898         struct fib6_table *table;
1899
1900         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1901         if (!table)
1902                 return NULL;
1903
1904         write_lock_bh(&table->tb6_lock);
1905         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1906                 if (dev == rt->dst.dev &&
1907                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1908                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1909                         break;
1910         }
1911         if (rt)
1912                 dst_hold(&rt->dst);
1913         write_unlock_bh(&table->tb6_lock);
1914         return rt;
1915 }
1916
1917 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1918                                      struct net_device *dev,
1919                                      unsigned int pref)
1920 {
1921         struct fib6_config cfg = {
1922                 .fc_table       = RT6_TABLE_DFLT,
1923                 .fc_metric      = IP6_RT_PRIO_USER,
1924                 .fc_ifindex     = dev->ifindex,
1925                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1926                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1927                 .fc_nlinfo.portid = 0,
1928                 .fc_nlinfo.nlh = NULL,
1929                 .fc_nlinfo.nl_net = dev_net(dev),
1930         };
1931
1932         cfg.fc_gateway = *gwaddr;
1933
1934         ip6_route_add(&cfg);
1935
1936         return rt6_get_dflt_router(gwaddr, dev);
1937 }
1938
1939 void rt6_purge_dflt_routers(struct net *net)
1940 {
1941         struct rt6_info *rt;
1942         struct fib6_table *table;
1943
1944         /* NOTE: Keep consistent with rt6_get_dflt_router */
1945         table = fib6_get_table(net, RT6_TABLE_DFLT);
1946         if (!table)
1947                 return;
1948
1949 restart:
1950         read_lock_bh(&table->tb6_lock);
1951         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1952                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1953                         dst_hold(&rt->dst);
1954                         read_unlock_bh(&table->tb6_lock);
1955                         ip6_del_rt(rt);
1956                         goto restart;
1957                 }
1958         }
1959         read_unlock_bh(&table->tb6_lock);
1960 }
1961
1962 static void rtmsg_to_fib6_config(struct net *net,
1963                                  struct in6_rtmsg *rtmsg,
1964                                  struct fib6_config *cfg)
1965 {
1966         memset(cfg, 0, sizeof(*cfg));
1967
1968         cfg->fc_table = RT6_TABLE_MAIN;
1969         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1970         cfg->fc_metric = rtmsg->rtmsg_metric;
1971         cfg->fc_expires = rtmsg->rtmsg_info;
1972         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1973         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1974         cfg->fc_flags = rtmsg->rtmsg_flags;
1975
1976         cfg->fc_nlinfo.nl_net = net;
1977
1978         cfg->fc_dst = rtmsg->rtmsg_dst;
1979         cfg->fc_src = rtmsg->rtmsg_src;
1980         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1981 }
1982
1983 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1984 {
1985         struct fib6_config cfg;
1986         struct in6_rtmsg rtmsg;
1987         int err;
1988
1989         switch(cmd) {
1990         case SIOCADDRT:         /* Add a route */
1991         case SIOCDELRT:         /* Delete a route */
1992                 if (!capable(CAP_NET_ADMIN))
1993                         return -EPERM;
1994                 err = copy_from_user(&rtmsg, arg,
1995                                      sizeof(struct in6_rtmsg));
1996                 if (err)
1997                         return -EFAULT;
1998
1999                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2000
2001                 rtnl_lock();
2002                 switch (cmd) {
2003                 case SIOCADDRT:
2004                         err = ip6_route_add(&cfg);
2005                         break;
2006                 case SIOCDELRT:
2007                         err = ip6_route_del(&cfg);
2008                         break;
2009                 default:
2010                         err = -EINVAL;
2011                 }
2012                 rtnl_unlock();
2013
2014                 return err;
2015         }
2016
2017         return -EINVAL;
2018 }
2019
2020 /*
2021  *      Drop the packet on the floor
2022  */
2023
2024 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2025 {
2026         int type;
2027         struct dst_entry *dst = skb_dst(skb);
2028         switch (ipstats_mib_noroutes) {
2029         case IPSTATS_MIB_INNOROUTES:
2030                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2031                 if (type == IPV6_ADDR_ANY) {
2032                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2033                                       IPSTATS_MIB_INADDRERRORS);
2034                         break;
2035                 }
2036                 /* FALLTHROUGH */
2037         case IPSTATS_MIB_OUTNOROUTES:
2038                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2039                               ipstats_mib_noroutes);
2040                 break;
2041         }
2042         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2043         kfree_skb(skb);
2044         return 0;
2045 }
2046
2047 static int ip6_pkt_discard(struct sk_buff *skb)
2048 {
2049         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2050 }
2051
2052 static int ip6_pkt_discard_out(struct sk_buff *skb)
2053 {
2054         skb->dev = skb_dst(skb)->dev;
2055         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2056 }
2057
2058 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2059
2060 static int ip6_pkt_prohibit(struct sk_buff *skb)
2061 {
2062         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2063 }
2064
2065 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2066 {
2067         skb->dev = skb_dst(skb)->dev;
2068         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2069 }
2070
2071 #endif
2072
2073 /*
2074  *      Allocate a dst for local (unicast / anycast) address.
2075  */
2076
2077 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2078                                     const struct in6_addr *addr,
2079                                     bool anycast)
2080 {
2081         struct net *net = dev_net(idev->dev);
2082         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2083         int err;
2084
2085         if (!rt) {
2086                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2087                 return ERR_PTR(-ENOMEM);
2088         }
2089
2090         in6_dev_hold(idev);
2091
2092         rt->dst.flags |= DST_HOST;
2093         rt->dst.input = ip6_input;
2094         rt->dst.output = ip6_output;
2095         rt->rt6i_idev = idev;
2096         rt->dst.obsolete = -1;
2097
2098         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2099         if (anycast)
2100                 rt->rt6i_flags |= RTF_ANYCAST;
2101         else
2102                 rt->rt6i_flags |= RTF_LOCAL;
2103         err = rt6_bind_neighbour(rt, rt->dst.dev);
2104         if (err) {
2105                 dst_free(&rt->dst);
2106                 return ERR_PTR(err);
2107         }
2108
2109         rt->rt6i_dst.addr = *addr;
2110         rt->rt6i_dst.plen = 128;
2111         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2112
2113         atomic_set(&rt->dst.__refcnt, 1);
2114
2115         return rt;
2116 }
2117
2118 int ip6_route_get_saddr(struct net *net,
2119                         struct rt6_info *rt,
2120                         const struct in6_addr *daddr,
2121                         unsigned int prefs,
2122                         struct in6_addr *saddr)
2123 {
2124         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2125         int err = 0;
2126         if (rt->rt6i_prefsrc.plen)
2127                 *saddr = rt->rt6i_prefsrc.addr;
2128         else
2129                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2130                                          daddr, prefs, saddr);
2131         return err;
2132 }
2133
2134 /* remove deleted ip from prefsrc entries */
2135 struct arg_dev_net_ip {
2136         struct net_device *dev;
2137         struct net *net;
2138         struct in6_addr *addr;
2139 };
2140
2141 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2142 {
2143         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2144         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2145         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2146
2147         if (((void *)rt->dst.dev == dev || !dev) &&
2148             rt != net->ipv6.ip6_null_entry &&
2149             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2150                 /* remove prefsrc entry */
2151                 rt->rt6i_prefsrc.plen = 0;
2152         }
2153         return 0;
2154 }
2155
2156 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2157 {
2158         struct net *net = dev_net(ifp->idev->dev);
2159         struct arg_dev_net_ip adni = {
2160                 .dev = ifp->idev->dev,
2161                 .net = net,
2162                 .addr = &ifp->addr,
2163         };
2164         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2165 }
2166
2167 struct arg_dev_net {
2168         struct net_device *dev;
2169         struct net *net;
2170 };
2171
2172 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2173 {
2174         const struct arg_dev_net *adn = arg;
2175         const struct net_device *dev = adn->dev;
2176
2177         if ((rt->dst.dev == dev || !dev) &&
2178             rt != adn->net->ipv6.ip6_null_entry)
2179                 return -1;
2180
2181         return 0;
2182 }
2183
2184 void rt6_ifdown(struct net *net, struct net_device *dev)
2185 {
2186         struct arg_dev_net adn = {
2187                 .dev = dev,
2188                 .net = net,
2189         };
2190
2191         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2192         icmp6_clean_all(fib6_ifdown, &adn);
2193 }
2194
2195 struct rt6_mtu_change_arg {
2196         struct net_device *dev;
2197         unsigned int mtu;
2198 };
2199
2200 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2201 {
2202         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2203         struct inet6_dev *idev;
2204
2205         /* In IPv6 pmtu discovery is not optional,
2206            so that RTAX_MTU lock cannot disable it.
2207            We still use this lock to block changes
2208            caused by addrconf/ndisc.
2209         */
2210
2211         idev = __in6_dev_get(arg->dev);
2212         if (!idev)
2213                 return 0;
2214
2215         /* For administrative MTU increase, there is no way to discover
2216            IPv6 PMTU increase, so PMTU increase should be updated here.
2217            Since RFC 1981 doesn't include administrative MTU increase
2218            update PMTU increase is a MUST. (i.e. jumbo frame)
2219          */
2220         /*
2221            If new MTU is less than route PMTU, this new MTU will be the
2222            lowest MTU in the path, update the route PMTU to reflect PMTU
2223            decreases; if new MTU is greater than route PMTU, and the
2224            old MTU is the lowest MTU in the path, update the route PMTU
2225            to reflect the increase. In this case if the other nodes' MTU
2226            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2227            PMTU discouvery.
2228          */
2229         if (rt->dst.dev == arg->dev &&
2230             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2231             (dst_mtu(&rt->dst) >= arg->mtu ||
2232              (dst_mtu(&rt->dst) < arg->mtu &&
2233               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2234                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2235         }
2236         return 0;
2237 }
2238
2239 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2240 {
2241         struct rt6_mtu_change_arg arg = {
2242                 .dev = dev,
2243                 .mtu = mtu,
2244         };
2245
2246         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2247 }
2248
2249 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2250         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2251         [RTA_OIF]               = { .type = NLA_U32 },
2252         [RTA_IIF]               = { .type = NLA_U32 },
2253         [RTA_PRIORITY]          = { .type = NLA_U32 },
2254         [RTA_METRICS]           = { .type = NLA_NESTED },
2255 };
2256
2257 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2258                               struct fib6_config *cfg)
2259 {
2260         struct rtmsg *rtm;
2261         struct nlattr *tb[RTA_MAX+1];
2262         int err;
2263
2264         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2265         if (err < 0)
2266                 goto errout;
2267
2268         err = -EINVAL;
2269         rtm = nlmsg_data(nlh);
2270         memset(cfg, 0, sizeof(*cfg));
2271
2272         cfg->fc_table = rtm->rtm_table;
2273         cfg->fc_dst_len = rtm->rtm_dst_len;
2274         cfg->fc_src_len = rtm->rtm_src_len;
2275         cfg->fc_flags = RTF_UP;
2276         cfg->fc_protocol = rtm->rtm_protocol;
2277         cfg->fc_type = rtm->rtm_type;
2278
2279         if (rtm->rtm_type == RTN_UNREACHABLE ||
2280             rtm->rtm_type == RTN_BLACKHOLE ||
2281             rtm->rtm_type == RTN_PROHIBIT ||
2282             rtm->rtm_type == RTN_THROW)
2283                 cfg->fc_flags |= RTF_REJECT;
2284
2285         if (rtm->rtm_type == RTN_LOCAL)
2286                 cfg->fc_flags |= RTF_LOCAL;
2287
2288         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2289         cfg->fc_nlinfo.nlh = nlh;
2290         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2291
2292         if (tb[RTA_GATEWAY]) {
2293                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2294                 cfg->fc_flags |= RTF_GATEWAY;
2295         }
2296
2297         if (tb[RTA_DST]) {
2298                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2299
2300                 if (nla_len(tb[RTA_DST]) < plen)
2301                         goto errout;
2302
2303                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2304         }
2305
2306         if (tb[RTA_SRC]) {
2307                 int plen = (rtm->rtm_src_len + 7) >> 3;
2308
2309                 if (nla_len(tb[RTA_SRC]) < plen)
2310                         goto errout;
2311
2312                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2313         }
2314
2315         if (tb[RTA_PREFSRC])
2316                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2317
2318         if (tb[RTA_OIF])
2319                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2320
2321         if (tb[RTA_PRIORITY])
2322                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2323
2324         if (tb[RTA_METRICS]) {
2325                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2326                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2327         }
2328
2329         if (tb[RTA_TABLE])
2330                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2331
2332         err = 0;
2333 errout:
2334         return err;
2335 }
2336
2337 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2338 {
2339         struct fib6_config cfg;
2340         int err;
2341
2342         err = rtm_to_fib6_config(skb, nlh, &cfg);
2343         if (err < 0)
2344                 return err;
2345
2346         return ip6_route_del(&cfg);
2347 }
2348
2349 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2350 {
2351         struct fib6_config cfg;
2352         int err;
2353
2354         err = rtm_to_fib6_config(skb, nlh, &cfg);
2355         if (err < 0)
2356                 return err;
2357
2358         return ip6_route_add(&cfg);
2359 }
2360
2361 static inline size_t rt6_nlmsg_size(void)
2362 {
2363         return NLMSG_ALIGN(sizeof(struct rtmsg))
2364                + nla_total_size(16) /* RTA_SRC */
2365                + nla_total_size(16) /* RTA_DST */
2366                + nla_total_size(16) /* RTA_GATEWAY */
2367                + nla_total_size(16) /* RTA_PREFSRC */
2368                + nla_total_size(4) /* RTA_TABLE */
2369                + nla_total_size(4) /* RTA_IIF */
2370                + nla_total_size(4) /* RTA_OIF */
2371                + nla_total_size(4) /* RTA_PRIORITY */
2372                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2373                + nla_total_size(sizeof(struct rta_cacheinfo));
2374 }
2375
2376 static int rt6_fill_node(struct net *net,
2377                          struct sk_buff *skb, struct rt6_info *rt,
2378                          struct in6_addr *dst, struct in6_addr *src,
2379                          int iif, int type, u32 portid, u32 seq,
2380                          int prefix, int nowait, unsigned int flags)
2381 {
2382         struct rtmsg *rtm;
2383         struct nlmsghdr *nlh;
2384         long expires;
2385         u32 table;
2386         struct neighbour *n;
2387
2388         if (prefix) {   /* user wants prefix routes only */
2389                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2390                         /* success since this is not a prefix route */
2391                         return 1;
2392                 }
2393         }
2394
2395         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2396         if (!nlh)
2397                 return -EMSGSIZE;
2398
2399         rtm = nlmsg_data(nlh);
2400         rtm->rtm_family = AF_INET6;
2401         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2402         rtm->rtm_src_len = rt->rt6i_src.plen;
2403         rtm->rtm_tos = 0;
2404         if (rt->rt6i_table)
2405                 table = rt->rt6i_table->tb6_id;
2406         else
2407                 table = RT6_TABLE_UNSPEC;
2408         rtm->rtm_table = table;
2409         if (nla_put_u32(skb, RTA_TABLE, table))
2410                 goto nla_put_failure;
2411         if (rt->rt6i_flags & RTF_REJECT) {
2412                 switch (rt->dst.error) {
2413                 case -EINVAL:
2414                         rtm->rtm_type = RTN_BLACKHOLE;
2415                         break;
2416                 case -EACCES:
2417                         rtm->rtm_type = RTN_PROHIBIT;
2418                         break;
2419                 case -EAGAIN:
2420                         rtm->rtm_type = RTN_THROW;
2421                         break;
2422                 default:
2423                         rtm->rtm_type = RTN_UNREACHABLE;
2424                         break;
2425                 }
2426         }
2427         else if (rt->rt6i_flags & RTF_LOCAL)
2428                 rtm->rtm_type = RTN_LOCAL;
2429         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2430                 rtm->rtm_type = RTN_LOCAL;
2431         else
2432                 rtm->rtm_type = RTN_UNICAST;
2433         rtm->rtm_flags = 0;
2434         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2435         rtm->rtm_protocol = rt->rt6i_protocol;
2436         if (rt->rt6i_flags & RTF_DYNAMIC)
2437                 rtm->rtm_protocol = RTPROT_REDIRECT;
2438         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2439                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2440                         rtm->rtm_protocol = RTPROT_RA;
2441                 else
2442                         rtm->rtm_protocol = RTPROT_KERNEL;
2443         }
2444
2445         if (rt->rt6i_flags & RTF_CACHE)
2446                 rtm->rtm_flags |= RTM_F_CLONED;
2447
2448         if (dst) {
2449                 if (nla_put(skb, RTA_DST, 16, dst))
2450                         goto nla_put_failure;
2451                 rtm->rtm_dst_len = 128;
2452         } else if (rtm->rtm_dst_len)
2453                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2454                         goto nla_put_failure;
2455 #ifdef CONFIG_IPV6_SUBTREES
2456         if (src) {
2457                 if (nla_put(skb, RTA_SRC, 16, src))
2458                         goto nla_put_failure;
2459                 rtm->rtm_src_len = 128;
2460         } else if (rtm->rtm_src_len &&
2461                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2462                 goto nla_put_failure;
2463 #endif
2464         if (iif) {
2465 #ifdef CONFIG_IPV6_MROUTE
2466                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2467                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2468                         if (err <= 0) {
2469                                 if (!nowait) {
2470                                         if (err == 0)
2471                                                 return 0;
2472                                         goto nla_put_failure;
2473                                 } else {
2474                                         if (err == -EMSGSIZE)
2475                                                 goto nla_put_failure;
2476                                 }
2477                         }
2478                 } else
2479 #endif
2480                         if (nla_put_u32(skb, RTA_IIF, iif))
2481                                 goto nla_put_failure;
2482         } else if (dst) {
2483                 struct in6_addr saddr_buf;
2484                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2485                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2486                         goto nla_put_failure;
2487         }
2488
2489         if (rt->rt6i_prefsrc.plen) {
2490                 struct in6_addr saddr_buf;
2491                 saddr_buf = rt->rt6i_prefsrc.addr;
2492                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2493                         goto nla_put_failure;
2494         }
2495
2496         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2497                 goto nla_put_failure;
2498
2499         rcu_read_lock();
2500         n = rt->n;
2501         if (n) {
2502                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2503                         rcu_read_unlock();
2504                         goto nla_put_failure;
2505                 }
2506         }
2507         rcu_read_unlock();
2508
2509         if (rt->dst.dev &&
2510             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2511                 goto nla_put_failure;
2512         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2513                 goto nla_put_failure;
2514
2515         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2516
2517         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2518                 goto nla_put_failure;
2519
2520         return nlmsg_end(skb, nlh);
2521
2522 nla_put_failure:
2523         nlmsg_cancel(skb, nlh);
2524         return -EMSGSIZE;
2525 }
2526
2527 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2528 {
2529         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2530         int prefix;
2531
2532         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2533                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2534                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2535         } else
2536                 prefix = 0;
2537
2538         return rt6_fill_node(arg->net,
2539                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2540                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2541                      prefix, 0, NLM_F_MULTI);
2542 }
2543
2544 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2545 {
2546         struct net *net = sock_net(in_skb->sk);
2547         struct nlattr *tb[RTA_MAX+1];
2548         struct rt6_info *rt;
2549         struct sk_buff *skb;
2550         struct rtmsg *rtm;
2551         struct flowi6 fl6;
2552         int err, iif = 0, oif = 0;
2553
2554         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2555         if (err < 0)
2556                 goto errout;
2557
2558         err = -EINVAL;
2559         memset(&fl6, 0, sizeof(fl6));
2560
2561         if (tb[RTA_SRC]) {
2562                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2563                         goto errout;
2564
2565                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2566         }
2567
2568         if (tb[RTA_DST]) {
2569                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2570                         goto errout;
2571
2572                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2573         }
2574
2575         if (tb[RTA_IIF])
2576                 iif = nla_get_u32(tb[RTA_IIF]);
2577
2578         if (tb[RTA_OIF])
2579                 oif = nla_get_u32(tb[RTA_OIF]);
2580
2581         if (iif) {
2582                 struct net_device *dev;
2583                 int flags = 0;
2584
2585                 dev = __dev_get_by_index(net, iif);
2586                 if (!dev) {
2587                         err = -ENODEV;
2588                         goto errout;
2589                 }
2590
2591                 fl6.flowi6_iif = iif;
2592
2593                 if (!ipv6_addr_any(&fl6.saddr))
2594                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2595
2596                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2597                                                                flags);
2598         } else {
2599                 fl6.flowi6_oif = oif;
2600
2601                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2602         }
2603
2604         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2605         if (!skb) {
2606                 dst_release(&rt->dst);
2607                 err = -ENOBUFS;
2608                 goto errout;
2609         }
2610
2611         /* Reserve room for dummy headers, this skb can pass
2612            through good chunk of routing engine.
2613          */
2614         skb_reset_mac_header(skb);
2615         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2616
2617         skb_dst_set(skb, &rt->dst);
2618
2619         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2620                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2621                             nlh->nlmsg_seq, 0, 0, 0);
2622         if (err < 0) {
2623                 kfree_skb(skb);
2624                 goto errout;
2625         }
2626
2627         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2628 errout:
2629         return err;
2630 }
2631
2632 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2633 {
2634         struct sk_buff *skb;
2635         struct net *net = info->nl_net;
2636         u32 seq;
2637         int err;
2638
2639         err = -ENOBUFS;
2640         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2641
2642         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2643         if (!skb)
2644                 goto errout;
2645
2646         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2647                                 event, info->portid, seq, 0, 0, 0);
2648         if (err < 0) {
2649                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2650                 WARN_ON(err == -EMSGSIZE);
2651                 kfree_skb(skb);
2652                 goto errout;
2653         }
2654         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2655                     info->nlh, gfp_any());
2656         return;
2657 errout:
2658         if (err < 0)
2659                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2660 }
2661
2662 static int ip6_route_dev_notify(struct notifier_block *this,
2663                                 unsigned long event, void *data)
2664 {
2665         struct net_device *dev = (struct net_device *)data;
2666         struct net *net = dev_net(dev);
2667
2668         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2669                 net->ipv6.ip6_null_entry->dst.dev = dev;
2670                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2671 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2672                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2673                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2674                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2675                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2676 #endif
2677         }
2678
2679         return NOTIFY_OK;
2680 }
2681
2682 /*
2683  *      /proc
2684  */
2685
2686 #ifdef CONFIG_PROC_FS
2687
2688 struct rt6_proc_arg
2689 {
2690         char *buffer;
2691         int offset;
2692         int length;
2693         int skip;
2694         int len;
2695 };
2696
2697 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2698 {
2699         struct seq_file *m = p_arg;
2700         struct neighbour *n;
2701
2702         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2703
2704 #ifdef CONFIG_IPV6_SUBTREES
2705         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2706 #else
2707         seq_puts(m, "00000000000000000000000000000000 00 ");
2708 #endif
2709         rcu_read_lock();
2710         n = rt->n;
2711         if (n) {
2712                 seq_printf(m, "%pi6", n->primary_key);
2713         } else {
2714                 seq_puts(m, "00000000000000000000000000000000");
2715         }
2716         rcu_read_unlock();
2717         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2718                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2719                    rt->dst.__use, rt->rt6i_flags,
2720                    rt->dst.dev ? rt->dst.dev->name : "");
2721         return 0;
2722 }
2723
2724 static int ipv6_route_show(struct seq_file *m, void *v)
2725 {
2726         struct net *net = (struct net *)m->private;
2727         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2728         return 0;
2729 }
2730
2731 static int ipv6_route_open(struct inode *inode, struct file *file)
2732 {
2733         return single_open_net(inode, file, ipv6_route_show);
2734 }
2735
2736 static const struct file_operations ipv6_route_proc_fops = {
2737         .owner          = THIS_MODULE,
2738         .open           = ipv6_route_open,
2739         .read           = seq_read,
2740         .llseek         = seq_lseek,
2741         .release        = single_release_net,
2742 };
2743
2744 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2745 {
2746         struct net *net = (struct net *)seq->private;
2747         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2748                    net->ipv6.rt6_stats->fib_nodes,
2749                    net->ipv6.rt6_stats->fib_route_nodes,
2750                    net->ipv6.rt6_stats->fib_rt_alloc,
2751                    net->ipv6.rt6_stats->fib_rt_entries,
2752                    net->ipv6.rt6_stats->fib_rt_cache,
2753                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2754                    net->ipv6.rt6_stats->fib_discarded_routes);
2755
2756         return 0;
2757 }
2758
2759 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2760 {
2761         return single_open_net(inode, file, rt6_stats_seq_show);
2762 }
2763
2764 static const struct file_operations rt6_stats_seq_fops = {
2765         .owner   = THIS_MODULE,
2766         .open    = rt6_stats_seq_open,
2767         .read    = seq_read,
2768         .llseek  = seq_lseek,
2769         .release = single_release_net,
2770 };
2771 #endif  /* CONFIG_PROC_FS */
2772
2773 #ifdef CONFIG_SYSCTL
2774
2775 static
2776 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2777                               void __user *buffer, size_t *lenp, loff_t *ppos)
2778 {
2779         struct net *net;
2780         int delay;
2781         if (!write)
2782                 return -EINVAL;
2783
2784         net = (struct net *)ctl->extra1;
2785         delay = net->ipv6.sysctl.flush_delay;
2786         proc_dointvec(ctl, write, buffer, lenp, ppos);
2787         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2788         return 0;
2789 }
2790
2791 ctl_table ipv6_route_table_template[] = {
2792         {
2793                 .procname       =       "flush",
2794                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2795                 .maxlen         =       sizeof(int),
2796                 .mode           =       0200,
2797                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2798         },
2799         {
2800                 .procname       =       "gc_thresh",
2801                 .data           =       &ip6_dst_ops_template.gc_thresh,
2802                 .maxlen         =       sizeof(int),
2803                 .mode           =       0644,
2804                 .proc_handler   =       proc_dointvec,
2805         },
2806         {
2807                 .procname       =       "max_size",
2808                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2809                 .maxlen         =       sizeof(int),
2810                 .mode           =       0644,
2811                 .proc_handler   =       proc_dointvec,
2812         },
2813         {
2814                 .procname       =       "gc_min_interval",
2815                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2816                 .maxlen         =       sizeof(int),
2817                 .mode           =       0644,
2818                 .proc_handler   =       proc_dointvec_jiffies,
2819         },
2820         {
2821                 .procname       =       "gc_timeout",
2822                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2823                 .maxlen         =       sizeof(int),
2824                 .mode           =       0644,
2825                 .proc_handler   =       proc_dointvec_jiffies,
2826         },
2827         {
2828                 .procname       =       "gc_interval",
2829                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2830                 .maxlen         =       sizeof(int),
2831                 .mode           =       0644,
2832                 .proc_handler   =       proc_dointvec_jiffies,
2833         },
2834         {
2835                 .procname       =       "gc_elasticity",
2836                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2837                 .maxlen         =       sizeof(int),
2838                 .mode           =       0644,
2839                 .proc_handler   =       proc_dointvec,
2840         },
2841         {
2842                 .procname       =       "mtu_expires",
2843                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2844                 .maxlen         =       sizeof(int),
2845                 .mode           =       0644,
2846                 .proc_handler   =       proc_dointvec_jiffies,
2847         },
2848         {
2849                 .procname       =       "min_adv_mss",
2850                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2851                 .maxlen         =       sizeof(int),
2852                 .mode           =       0644,
2853                 .proc_handler   =       proc_dointvec,
2854         },
2855         {
2856                 .procname       =       "gc_min_interval_ms",
2857                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2858                 .maxlen         =       sizeof(int),
2859                 .mode           =       0644,
2860                 .proc_handler   =       proc_dointvec_ms_jiffies,
2861         },
2862         { }
2863 };
2864
2865 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2866 {
2867         struct ctl_table *table;
2868
2869         table = kmemdup(ipv6_route_table_template,
2870                         sizeof(ipv6_route_table_template),
2871                         GFP_KERNEL);
2872
2873         if (table) {
2874                 table[0].data = &net->ipv6.sysctl.flush_delay;
2875                 table[0].extra1 = net;
2876                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2877                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2878                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2879                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2880                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2881                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2882                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2883                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2884                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2885         }
2886
2887         return table;
2888 }
2889 #endif
2890
2891 static int __net_init ip6_route_net_init(struct net *net)
2892 {
2893         int ret = -ENOMEM;
2894
2895         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2896                sizeof(net->ipv6.ip6_dst_ops));
2897
2898         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2899                 goto out_ip6_dst_ops;
2900
2901         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2902                                            sizeof(*net->ipv6.ip6_null_entry),
2903                                            GFP_KERNEL);
2904         if (!net->ipv6.ip6_null_entry)
2905                 goto out_ip6_dst_entries;
2906         net->ipv6.ip6_null_entry->dst.path =
2907                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2908         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2909         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2910                          ip6_template_metrics, true);
2911
2912 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2914                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2915                                                GFP_KERNEL);
2916         if (!net->ipv6.ip6_prohibit_entry)
2917                 goto out_ip6_null_entry;
2918         net->ipv6.ip6_prohibit_entry->dst.path =
2919                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2920         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2921         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2922                          ip6_template_metrics, true);
2923
2924         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2925                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2926                                                GFP_KERNEL);
2927         if (!net->ipv6.ip6_blk_hole_entry)
2928                 goto out_ip6_prohibit_entry;
2929         net->ipv6.ip6_blk_hole_entry->dst.path =
2930                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2931         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2932         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2933                          ip6_template_metrics, true);
2934 #endif
2935
2936         net->ipv6.sysctl.flush_delay = 0;
2937         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2938         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2939         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2940         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2941         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2942         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2943         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2944
2945         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2946
2947         ret = 0;
2948 out:
2949         return ret;
2950
2951 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2952 out_ip6_prohibit_entry:
2953         kfree(net->ipv6.ip6_prohibit_entry);
2954 out_ip6_null_entry:
2955         kfree(net->ipv6.ip6_null_entry);
2956 #endif
2957 out_ip6_dst_entries:
2958         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2959 out_ip6_dst_ops:
2960         goto out;
2961 }
2962
2963 static void __net_exit ip6_route_net_exit(struct net *net)
2964 {
2965         kfree(net->ipv6.ip6_null_entry);
2966 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967         kfree(net->ipv6.ip6_prohibit_entry);
2968         kfree(net->ipv6.ip6_blk_hole_entry);
2969 #endif
2970         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2971 }
2972
2973 static int __net_init ip6_route_net_init_late(struct net *net)
2974 {
2975 #ifdef CONFIG_PROC_FS
2976         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2977         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2978 #endif
2979         return 0;
2980 }
2981
2982 static void __net_exit ip6_route_net_exit_late(struct net *net)
2983 {
2984 #ifdef CONFIG_PROC_FS
2985         proc_net_remove(net, "ipv6_route");
2986         proc_net_remove(net, "rt6_stats");
2987 #endif
2988 }
2989
2990 static struct pernet_operations ip6_route_net_ops = {
2991         .init = ip6_route_net_init,
2992         .exit = ip6_route_net_exit,
2993 };
2994
2995 static int __net_init ipv6_inetpeer_init(struct net *net)
2996 {
2997         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2998
2999         if (!bp)
3000                 return -ENOMEM;
3001         inet_peer_base_init(bp);
3002         net->ipv6.peers = bp;
3003         return 0;
3004 }
3005
3006 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3007 {
3008         struct inet_peer_base *bp = net->ipv6.peers;
3009
3010         net->ipv6.peers = NULL;
3011         inetpeer_invalidate_tree(bp);
3012         kfree(bp);
3013 }
3014
3015 static struct pernet_operations ipv6_inetpeer_ops = {
3016         .init   =       ipv6_inetpeer_init,
3017         .exit   =       ipv6_inetpeer_exit,
3018 };
3019
3020 static struct pernet_operations ip6_route_net_late_ops = {
3021         .init = ip6_route_net_init_late,
3022         .exit = ip6_route_net_exit_late,
3023 };
3024
3025 static struct notifier_block ip6_route_dev_notifier = {
3026         .notifier_call = ip6_route_dev_notify,
3027         .priority = 0,
3028 };
3029
3030 int __init ip6_route_init(void)
3031 {
3032         int ret;
3033
3034         ret = -ENOMEM;
3035         ip6_dst_ops_template.kmem_cachep =
3036                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3037                                   SLAB_HWCACHE_ALIGN, NULL);
3038         if (!ip6_dst_ops_template.kmem_cachep)
3039                 goto out;
3040
3041         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3042         if (ret)
3043                 goto out_kmem_cache;
3044
3045         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3046         if (ret)
3047                 goto out_dst_entries;
3048
3049         ret = register_pernet_subsys(&ip6_route_net_ops);
3050         if (ret)
3051                 goto out_register_inetpeer;
3052
3053         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3054
3055         /* Registering of the loopback is done before this portion of code,
3056          * the loopback reference in rt6_info will not be taken, do it
3057          * manually for init_net */
3058         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3059         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3060   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3061         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3062         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3063         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3064         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3065   #endif
3066         ret = fib6_init();
3067         if (ret)
3068                 goto out_register_subsys;
3069
3070         ret = xfrm6_init();
3071         if (ret)
3072                 goto out_fib6_init;
3073
3074         ret = fib6_rules_init();
3075         if (ret)
3076                 goto xfrm6_init;
3077
3078         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3079         if (ret)
3080                 goto fib6_rules_init;
3081
3082         ret = -ENOBUFS;
3083         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3084             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3085             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3086                 goto out_register_late_subsys;
3087
3088         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3089         if (ret)
3090                 goto out_register_late_subsys;
3091
3092 out:
3093         return ret;
3094
3095 out_register_late_subsys:
3096         unregister_pernet_subsys(&ip6_route_net_late_ops);
3097 fib6_rules_init:
3098         fib6_rules_cleanup();
3099 xfrm6_init:
3100         xfrm6_fini();
3101 out_fib6_init:
3102         fib6_gc_cleanup();
3103 out_register_subsys:
3104         unregister_pernet_subsys(&ip6_route_net_ops);
3105 out_register_inetpeer:
3106         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3107 out_dst_entries:
3108         dst_entries_destroy(&ip6_dst_blackhole_ops);
3109 out_kmem_cache:
3110         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3111         goto out;
3112 }
3113
3114 void ip6_route_cleanup(void)
3115 {
3116         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3117         unregister_pernet_subsys(&ip6_route_net_late_ops);
3118         fib6_rules_cleanup();
3119         xfrm6_fini();
3120         fib6_gc_cleanup();
3121         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3122         unregister_pernet_subsys(&ip6_route_net_ops);
3123         dst_entries_destroy(&ip6_dst_blackhole_ops);
3124         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3125 }