inet: Hide route peer accesses behind helpers.
[linux-3.10.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68                                     const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int      ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void             ip6_dst_destroy(struct dst_entry *);
74 static void             ip6_dst_ifdown(struct dst_entry *,
75                                        struct net_device *dev, int how);
76 static int               ip6_dst_gc(struct dst_ops *ops);
77
78 static int              ip6_pkt_discard(struct sk_buff *skb);
79 static int              ip6_pkt_discard_out(struct sk_buff *skb);
80 static void             ip6_link_failure(struct sk_buff *skb);
81 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85                                            const struct in6_addr *prefix, int prefixlen,
86                                            const struct in6_addr *gwaddr, int ifindex,
87                                            unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95         struct rt6_info *rt = (struct rt6_info *) dst;
96         struct inet_peer *peer;
97         u32 *p = NULL;
98
99         if (!(rt->dst.flags & DST_HOST))
100                 return NULL;
101
102         peer = rt6_get_peer_create(rt);
103         if (peer) {
104                 u32 *old_p = __DST_METRICS_PTR(old);
105                 unsigned long prev, new;
106
107                 p = peer->metrics;
108                 if (inet_metrics_new(peer))
109                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111                 new = (unsigned long) p;
112                 prev = cmpxchg(&dst->_metrics, old, new);
113
114                 if (prev != old) {
115                         p = __DST_METRICS_PTR(prev);
116                         if (prev & DST_METRICS_READ_ONLY)
117                                 p = NULL;
118                 }
119         }
120         return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124 {
125         struct in6_addr *p = &rt->rt6i_gateway;
126
127         if (!ipv6_addr_any(p))
128                 return (const void *) p;
129         return daddr;
130 }
131
132 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133 {
134         struct rt6_info *rt = (struct rt6_info *) dst;
135         struct neighbour *n;
136
137         daddr = choose_neigh_daddr(rt, daddr);
138         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
139         if (n)
140                 return n;
141         return neigh_create(&nd_tbl, daddr, dst->dev);
142 }
143
144 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
145 {
146         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147         if (!n) {
148                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149                 if (IS_ERR(n))
150                         return PTR_ERR(n);
151         }
152         dst_set_neighbour(&rt->dst, n);
153
154         return 0;
155 }
156
157 static struct dst_ops ip6_dst_ops_template = {
158         .family                 =       AF_INET6,
159         .protocol               =       cpu_to_be16(ETH_P_IPV6),
160         .gc                     =       ip6_dst_gc,
161         .gc_thresh              =       1024,
162         .check                  =       ip6_dst_check,
163         .default_advmss         =       ip6_default_advmss,
164         .mtu                    =       ip6_mtu,
165         .cow_metrics            =       ipv6_cow_metrics,
166         .destroy                =       ip6_dst_destroy,
167         .ifdown                 =       ip6_dst_ifdown,
168         .negative_advice        =       ip6_negative_advice,
169         .link_failure           =       ip6_link_failure,
170         .update_pmtu            =       ip6_rt_update_pmtu,
171         .local_out              =       __ip6_local_out,
172         .neigh_lookup           =       ip6_neigh_lookup,
173 };
174
175 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
176 {
177         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179         return mtu ? : dst->dev->mtu;
180 }
181
182 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183 {
184 }
185
186 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187                                          unsigned long old)
188 {
189         return NULL;
190 }
191
192 static struct dst_ops ip6_dst_blackhole_ops = {
193         .family                 =       AF_INET6,
194         .protocol               =       cpu_to_be16(ETH_P_IPV6),
195         .destroy                =       ip6_dst_destroy,
196         .check                  =       ip6_dst_check,
197         .mtu                    =       ip6_blackhole_mtu,
198         .default_advmss         =       ip6_default_advmss,
199         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
200         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
201         .neigh_lookup           =       ip6_neigh_lookup,
202 };
203
204 static const u32 ip6_template_metrics[RTAX_MAX] = {
205         [RTAX_HOPLIMIT - 1] = 255,
206 };
207
208 static struct rt6_info ip6_null_entry_template = {
209         .dst = {
210                 .__refcnt       = ATOMIC_INIT(1),
211                 .__use          = 1,
212                 .obsolete       = -1,
213                 .error          = -ENETUNREACH,
214                 .input          = ip6_pkt_discard,
215                 .output         = ip6_pkt_discard_out,
216         },
217         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
218         .rt6i_protocol  = RTPROT_KERNEL,
219         .rt6i_metric    = ~(u32) 0,
220         .rt6i_ref       = ATOMIC_INIT(1),
221 };
222
223 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
225 static int ip6_pkt_prohibit(struct sk_buff *skb);
226 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
227
228 static struct rt6_info ip6_prohibit_entry_template = {
229         .dst = {
230                 .__refcnt       = ATOMIC_INIT(1),
231                 .__use          = 1,
232                 .obsolete       = -1,
233                 .error          = -EACCES,
234                 .input          = ip6_pkt_prohibit,
235                 .output         = ip6_pkt_prohibit_out,
236         },
237         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
238         .rt6i_protocol  = RTPROT_KERNEL,
239         .rt6i_metric    = ~(u32) 0,
240         .rt6i_ref       = ATOMIC_INIT(1),
241 };
242
243 static struct rt6_info ip6_blk_hole_entry_template = {
244         .dst = {
245                 .__refcnt       = ATOMIC_INIT(1),
246                 .__use          = 1,
247                 .obsolete       = -1,
248                 .error          = -EINVAL,
249                 .input          = dst_discard,
250                 .output         = dst_discard,
251         },
252         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
253         .rt6i_protocol  = RTPROT_KERNEL,
254         .rt6i_metric    = ~(u32) 0,
255         .rt6i_ref       = ATOMIC_INIT(1),
256 };
257
258 #endif
259
260 /* allocate dst with ip6_dst_ops */
261 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
262                                              struct net_device *dev,
263                                              int flags)
264 {
265         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
266                                         0, 0, flags);
267
268         if (rt) {
269                 memset(&rt->rt6i_table, 0,
270                        sizeof(*rt) - sizeof(struct dst_entry));
271                 rt6_init_peer(rt, net->ipv6.peers);
272         }
273         return rt;
274 }
275
276 static void ip6_dst_destroy(struct dst_entry *dst)
277 {
278         struct rt6_info *rt = (struct rt6_info *)dst;
279         struct inet6_dev *idev = rt->rt6i_idev;
280
281         if (!(rt->dst.flags & DST_HOST))
282                 dst_destroy_metrics_generic(dst);
283
284         if (idev) {
285                 rt->rt6i_idev = NULL;
286                 in6_dev_put(idev);
287         }
288
289         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
290                 dst_release(dst->from);
291
292         if (rt6_has_peer(rt)) {
293                 struct inet_peer *peer = rt6_peer_ptr(rt);
294                 inet_putpeer(peer);
295         }
296 }
297
298 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
299
300 static u32 rt6_peer_genid(void)
301 {
302         return atomic_read(&__rt6_peer_genid);
303 }
304
305 void rt6_bind_peer(struct rt6_info *rt, int create)
306 {
307         struct inet_peer_base *base;
308         struct inet_peer *peer;
309
310         base = inetpeer_base_ptr(rt->_rt6i_peer);
311         if (!base)
312                 return;
313
314         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
315         if (!rt6_set_peer(rt, peer))
316                 inet_putpeer(peer);
317         else
318                 rt->rt6i_peer_genid = rt6_peer_genid();
319 }
320
321 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
322                            int how)
323 {
324         struct rt6_info *rt = (struct rt6_info *)dst;
325         struct inet6_dev *idev = rt->rt6i_idev;
326         struct net_device *loopback_dev =
327                 dev_net(dev)->loopback_dev;
328
329         if (dev != loopback_dev && idev && idev->dev == dev) {
330                 struct inet6_dev *loopback_idev =
331                         in6_dev_get(loopback_dev);
332                 if (loopback_idev) {
333                         rt->rt6i_idev = loopback_idev;
334                         in6_dev_put(idev);
335                 }
336         }
337 }
338
339 static bool rt6_check_expired(const struct rt6_info *rt)
340 {
341         struct rt6_info *ort = NULL;
342
343         if (rt->rt6i_flags & RTF_EXPIRES) {
344                 if (time_after(jiffies, rt->dst.expires))
345                         return true;
346         } else if (rt->dst.from) {
347                 ort = (struct rt6_info *) rt->dst.from;
348                 return (ort->rt6i_flags & RTF_EXPIRES) &&
349                         time_after(jiffies, ort->dst.expires);
350         }
351         return false;
352 }
353
354 static bool rt6_need_strict(const struct in6_addr *daddr)
355 {
356         return ipv6_addr_type(daddr) &
357                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
358 }
359
360 /*
361  *      Route lookup. Any table->tb6_lock is implied.
362  */
363
364 static inline struct rt6_info *rt6_device_match(struct net *net,
365                                                     struct rt6_info *rt,
366                                                     const struct in6_addr *saddr,
367                                                     int oif,
368                                                     int flags)
369 {
370         struct rt6_info *local = NULL;
371         struct rt6_info *sprt;
372
373         if (!oif && ipv6_addr_any(saddr))
374                 goto out;
375
376         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
377                 struct net_device *dev = sprt->dst.dev;
378
379                 if (oif) {
380                         if (dev->ifindex == oif)
381                                 return sprt;
382                         if (dev->flags & IFF_LOOPBACK) {
383                                 if (!sprt->rt6i_idev ||
384                                     sprt->rt6i_idev->dev->ifindex != oif) {
385                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
386                                                 continue;
387                                         if (local && (!oif ||
388                                                       local->rt6i_idev->dev->ifindex == oif))
389                                                 continue;
390                                 }
391                                 local = sprt;
392                         }
393                 } else {
394                         if (ipv6_chk_addr(net, saddr, dev,
395                                           flags & RT6_LOOKUP_F_IFACE))
396                                 return sprt;
397                 }
398         }
399
400         if (oif) {
401                 if (local)
402                         return local;
403
404                 if (flags & RT6_LOOKUP_F_IFACE)
405                         return net->ipv6.ip6_null_entry;
406         }
407 out:
408         return rt;
409 }
410
411 #ifdef CONFIG_IPV6_ROUTER_PREF
412 static void rt6_probe(struct rt6_info *rt)
413 {
414         struct neighbour *neigh;
415         /*
416          * Okay, this does not seem to be appropriate
417          * for now, however, we need to check if it
418          * is really so; aka Router Reachability Probing.
419          *
420          * Router Reachability Probe MUST be rate-limited
421          * to no more than one per minute.
422          */
423         rcu_read_lock();
424         neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
425         if (!neigh || (neigh->nud_state & NUD_VALID))
426                 goto out;
427         read_lock_bh(&neigh->lock);
428         if (!(neigh->nud_state & NUD_VALID) &&
429             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
430                 struct in6_addr mcaddr;
431                 struct in6_addr *target;
432
433                 neigh->updated = jiffies;
434                 read_unlock_bh(&neigh->lock);
435
436                 target = (struct in6_addr *)&neigh->primary_key;
437                 addrconf_addr_solict_mult(target, &mcaddr);
438                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
439         } else {
440                 read_unlock_bh(&neigh->lock);
441         }
442 out:
443         rcu_read_unlock();
444 }
445 #else
446 static inline void rt6_probe(struct rt6_info *rt)
447 {
448 }
449 #endif
450
451 /*
452  * Default Router Selection (RFC 2461 6.3.6)
453  */
454 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
455 {
456         struct net_device *dev = rt->dst.dev;
457         if (!oif || dev->ifindex == oif)
458                 return 2;
459         if ((dev->flags & IFF_LOOPBACK) &&
460             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
461                 return 1;
462         return 0;
463 }
464
465 static inline int rt6_check_neigh(struct rt6_info *rt)
466 {
467         struct neighbour *neigh;
468         int m;
469
470         rcu_read_lock();
471         neigh = dst_get_neighbour_noref(&rt->dst);
472         if (rt->rt6i_flags & RTF_NONEXTHOP ||
473             !(rt->rt6i_flags & RTF_GATEWAY))
474                 m = 1;
475         else if (neigh) {
476                 read_lock_bh(&neigh->lock);
477                 if (neigh->nud_state & NUD_VALID)
478                         m = 2;
479 #ifdef CONFIG_IPV6_ROUTER_PREF
480                 else if (neigh->nud_state & NUD_FAILED)
481                         m = 0;
482 #endif
483                 else
484                         m = 1;
485                 read_unlock_bh(&neigh->lock);
486         } else
487                 m = 0;
488         rcu_read_unlock();
489         return m;
490 }
491
492 static int rt6_score_route(struct rt6_info *rt, int oif,
493                            int strict)
494 {
495         int m, n;
496
497         m = rt6_check_dev(rt, oif);
498         if (!m && (strict & RT6_LOOKUP_F_IFACE))
499                 return -1;
500 #ifdef CONFIG_IPV6_ROUTER_PREF
501         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
502 #endif
503         n = rt6_check_neigh(rt);
504         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
505                 return -1;
506         return m;
507 }
508
509 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
510                                    int *mpri, struct rt6_info *match)
511 {
512         int m;
513
514         if (rt6_check_expired(rt))
515                 goto out;
516
517         m = rt6_score_route(rt, oif, strict);
518         if (m < 0)
519                 goto out;
520
521         if (m > *mpri) {
522                 if (strict & RT6_LOOKUP_F_REACHABLE)
523                         rt6_probe(match);
524                 *mpri = m;
525                 match = rt;
526         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
527                 rt6_probe(rt);
528         }
529
530 out:
531         return match;
532 }
533
534 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
535                                      struct rt6_info *rr_head,
536                                      u32 metric, int oif, int strict)
537 {
538         struct rt6_info *rt, *match;
539         int mpri = -1;
540
541         match = NULL;
542         for (rt = rr_head; rt && rt->rt6i_metric == metric;
543              rt = rt->dst.rt6_next)
544                 match = find_match(rt, oif, strict, &mpri, match);
545         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
546              rt = rt->dst.rt6_next)
547                 match = find_match(rt, oif, strict, &mpri, match);
548
549         return match;
550 }
551
552 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
553 {
554         struct rt6_info *match, *rt0;
555         struct net *net;
556
557         rt0 = fn->rr_ptr;
558         if (!rt0)
559                 fn->rr_ptr = rt0 = fn->leaf;
560
561         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
562
563         if (!match &&
564             (strict & RT6_LOOKUP_F_REACHABLE)) {
565                 struct rt6_info *next = rt0->dst.rt6_next;
566
567                 /* no entries matched; do round-robin */
568                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
569                         next = fn->leaf;
570
571                 if (next != rt0)
572                         fn->rr_ptr = next;
573         }
574
575         net = dev_net(rt0->dst.dev);
576         return match ? match : net->ipv6.ip6_null_entry;
577 }
578
579 #ifdef CONFIG_IPV6_ROUTE_INFO
580 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
581                   const struct in6_addr *gwaddr)
582 {
583         struct net *net = dev_net(dev);
584         struct route_info *rinfo = (struct route_info *) opt;
585         struct in6_addr prefix_buf, *prefix;
586         unsigned int pref;
587         unsigned long lifetime;
588         struct rt6_info *rt;
589
590         if (len < sizeof(struct route_info)) {
591                 return -EINVAL;
592         }
593
594         /* Sanity check for prefix_len and length */
595         if (rinfo->length > 3) {
596                 return -EINVAL;
597         } else if (rinfo->prefix_len > 128) {
598                 return -EINVAL;
599         } else if (rinfo->prefix_len > 64) {
600                 if (rinfo->length < 2) {
601                         return -EINVAL;
602                 }
603         } else if (rinfo->prefix_len > 0) {
604                 if (rinfo->length < 1) {
605                         return -EINVAL;
606                 }
607         }
608
609         pref = rinfo->route_pref;
610         if (pref == ICMPV6_ROUTER_PREF_INVALID)
611                 return -EINVAL;
612
613         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
614
615         if (rinfo->length == 3)
616                 prefix = (struct in6_addr *)rinfo->prefix;
617         else {
618                 /* this function is safe */
619                 ipv6_addr_prefix(&prefix_buf,
620                                  (struct in6_addr *)rinfo->prefix,
621                                  rinfo->prefix_len);
622                 prefix = &prefix_buf;
623         }
624
625         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
626                                 dev->ifindex);
627
628         if (rt && !lifetime) {
629                 ip6_del_rt(rt);
630                 rt = NULL;
631         }
632
633         if (!rt && lifetime)
634                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
635                                         pref);
636         else if (rt)
637                 rt->rt6i_flags = RTF_ROUTEINFO |
638                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
639
640         if (rt) {
641                 if (!addrconf_finite_timeout(lifetime))
642                         rt6_clean_expires(rt);
643                 else
644                         rt6_set_expires(rt, jiffies + HZ * lifetime);
645
646                 dst_release(&rt->dst);
647         }
648         return 0;
649 }
650 #endif
651
652 #define BACKTRACK(__net, saddr)                 \
653 do { \
654         if (rt == __net->ipv6.ip6_null_entry) { \
655                 struct fib6_node *pn; \
656                 while (1) { \
657                         if (fn->fn_flags & RTN_TL_ROOT) \
658                                 goto out; \
659                         pn = fn->parent; \
660                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
661                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
662                         else \
663                                 fn = pn; \
664                         if (fn->fn_flags & RTN_RTINFO) \
665                                 goto restart; \
666                 } \
667         } \
668 } while (0)
669
670 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
671                                              struct fib6_table *table,
672                                              struct flowi6 *fl6, int flags)
673 {
674         struct fib6_node *fn;
675         struct rt6_info *rt;
676
677         read_lock_bh(&table->tb6_lock);
678         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
679 restart:
680         rt = fn->leaf;
681         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
682         BACKTRACK(net, &fl6->saddr);
683 out:
684         dst_use(&rt->dst, jiffies);
685         read_unlock_bh(&table->tb6_lock);
686         return rt;
687
688 }
689
690 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
691                                     int flags)
692 {
693         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
694 }
695 EXPORT_SYMBOL_GPL(ip6_route_lookup);
696
697 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
698                             const struct in6_addr *saddr, int oif, int strict)
699 {
700         struct flowi6 fl6 = {
701                 .flowi6_oif = oif,
702                 .daddr = *daddr,
703         };
704         struct dst_entry *dst;
705         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
706
707         if (saddr) {
708                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
709                 flags |= RT6_LOOKUP_F_HAS_SADDR;
710         }
711
712         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
713         if (dst->error == 0)
714                 return (struct rt6_info *) dst;
715
716         dst_release(dst);
717
718         return NULL;
719 }
720
721 EXPORT_SYMBOL(rt6_lookup);
722
723 /* ip6_ins_rt is called with FREE table->tb6_lock.
724    It takes new route entry, the addition fails by any reason the
725    route is freed. In any case, if caller does not hold it, it may
726    be destroyed.
727  */
728
729 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
730 {
731         int err;
732         struct fib6_table *table;
733
734         table = rt->rt6i_table;
735         write_lock_bh(&table->tb6_lock);
736         err = fib6_add(&table->tb6_root, rt, info);
737         write_unlock_bh(&table->tb6_lock);
738
739         return err;
740 }
741
742 int ip6_ins_rt(struct rt6_info *rt)
743 {
744         struct nl_info info = {
745                 .nl_net = dev_net(rt->dst.dev),
746         };
747         return __ip6_ins_rt(rt, &info);
748 }
749
750 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
751                                       const struct in6_addr *daddr,
752                                       const struct in6_addr *saddr)
753 {
754         struct rt6_info *rt;
755
756         /*
757          *      Clone the route.
758          */
759
760         rt = ip6_rt_copy(ort, daddr);
761
762         if (rt) {
763                 int attempts = !in_softirq();
764
765                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
766                         if (ort->rt6i_dst.plen != 128 &&
767                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
768                                 rt->rt6i_flags |= RTF_ANYCAST;
769                         rt->rt6i_gateway = *daddr;
770                 }
771
772                 rt->rt6i_flags |= RTF_CACHE;
773
774 #ifdef CONFIG_IPV6_SUBTREES
775                 if (rt->rt6i_src.plen && saddr) {
776                         rt->rt6i_src.addr = *saddr;
777                         rt->rt6i_src.plen = 128;
778                 }
779 #endif
780
781         retry:
782                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
783                         struct net *net = dev_net(rt->dst.dev);
784                         int saved_rt_min_interval =
785                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
786                         int saved_rt_elasticity =
787                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
788
789                         if (attempts-- > 0) {
790                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
791                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
792
793                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
794
795                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
796                                         saved_rt_elasticity;
797                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
798                                         saved_rt_min_interval;
799                                 goto retry;
800                         }
801
802                         net_warn_ratelimited("Neighbour table overflow\n");
803                         dst_free(&rt->dst);
804                         return NULL;
805                 }
806         }
807
808         return rt;
809 }
810
811 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
812                                         const struct in6_addr *daddr)
813 {
814         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
815
816         if (rt) {
817                 rt->rt6i_flags |= RTF_CACHE;
818                 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
819         }
820         return rt;
821 }
822
823 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
824                                       struct flowi6 *fl6, int flags)
825 {
826         struct fib6_node *fn;
827         struct rt6_info *rt, *nrt;
828         int strict = 0;
829         int attempts = 3;
830         int err;
831         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
832
833         strict |= flags & RT6_LOOKUP_F_IFACE;
834
835 relookup:
836         read_lock_bh(&table->tb6_lock);
837
838 restart_2:
839         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
840
841 restart:
842         rt = rt6_select(fn, oif, strict | reachable);
843
844         BACKTRACK(net, &fl6->saddr);
845         if (rt == net->ipv6.ip6_null_entry ||
846             rt->rt6i_flags & RTF_CACHE)
847                 goto out;
848
849         dst_hold(&rt->dst);
850         read_unlock_bh(&table->tb6_lock);
851
852         if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
853                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
854         else if (!(rt->dst.flags & DST_HOST))
855                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
856         else
857                 goto out2;
858
859         dst_release(&rt->dst);
860         rt = nrt ? : net->ipv6.ip6_null_entry;
861
862         dst_hold(&rt->dst);
863         if (nrt) {
864                 err = ip6_ins_rt(nrt);
865                 if (!err)
866                         goto out2;
867         }
868
869         if (--attempts <= 0)
870                 goto out2;
871
872         /*
873          * Race condition! In the gap, when table->tb6_lock was
874          * released someone could insert this route.  Relookup.
875          */
876         dst_release(&rt->dst);
877         goto relookup;
878
879 out:
880         if (reachable) {
881                 reachable = 0;
882                 goto restart_2;
883         }
884         dst_hold(&rt->dst);
885         read_unlock_bh(&table->tb6_lock);
886 out2:
887         rt->dst.lastuse = jiffies;
888         rt->dst.__use++;
889
890         return rt;
891 }
892
893 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
894                                             struct flowi6 *fl6, int flags)
895 {
896         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
897 }
898
899 static struct dst_entry *ip6_route_input_lookup(struct net *net,
900                                                 struct net_device *dev,
901                                                 struct flowi6 *fl6, int flags)
902 {
903         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
904                 flags |= RT6_LOOKUP_F_IFACE;
905
906         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
907 }
908
909 void ip6_route_input(struct sk_buff *skb)
910 {
911         const struct ipv6hdr *iph = ipv6_hdr(skb);
912         struct net *net = dev_net(skb->dev);
913         int flags = RT6_LOOKUP_F_HAS_SADDR;
914         struct flowi6 fl6 = {
915                 .flowi6_iif = skb->dev->ifindex,
916                 .daddr = iph->daddr,
917                 .saddr = iph->saddr,
918                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
919                 .flowi6_mark = skb->mark,
920                 .flowi6_proto = iph->nexthdr,
921         };
922
923         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
924 }
925
926 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
927                                              struct flowi6 *fl6, int flags)
928 {
929         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
930 }
931
932 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
933                                     struct flowi6 *fl6)
934 {
935         int flags = 0;
936
937         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
938                 flags |= RT6_LOOKUP_F_IFACE;
939
940         if (!ipv6_addr_any(&fl6->saddr))
941                 flags |= RT6_LOOKUP_F_HAS_SADDR;
942         else if (sk)
943                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
944
945         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
946 }
947
948 EXPORT_SYMBOL(ip6_route_output);
949
950 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
951 {
952         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
953         struct dst_entry *new = NULL;
954
955         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
956         if (rt) {
957                 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
958                 rt6_init_peer(rt, net->ipv6.peers);
959
960                 new = &rt->dst;
961
962                 new->__use = 1;
963                 new->input = dst_discard;
964                 new->output = dst_discard;
965
966                 if (dst_metrics_read_only(&ort->dst))
967                         new->_metrics = ort->dst._metrics;
968                 else
969                         dst_copy_metrics(new, &ort->dst);
970                 rt->rt6i_idev = ort->rt6i_idev;
971                 if (rt->rt6i_idev)
972                         in6_dev_hold(rt->rt6i_idev);
973
974                 rt->rt6i_gateway = ort->rt6i_gateway;
975                 rt->rt6i_flags = ort->rt6i_flags;
976                 rt6_clean_expires(rt);
977                 rt->rt6i_metric = 0;
978
979                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
980 #ifdef CONFIG_IPV6_SUBTREES
981                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
982 #endif
983
984                 dst_free(new);
985         }
986
987         dst_release(dst_orig);
988         return new ? new : ERR_PTR(-ENOMEM);
989 }
990
991 /*
992  *      Destination cache support functions
993  */
994
995 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
996 {
997         struct rt6_info *rt;
998
999         rt = (struct rt6_info *) dst;
1000
1001         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1002                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1003                         if (!rt6_has_peer(rt))
1004                                 rt6_bind_peer(rt, 0);
1005                         rt->rt6i_peer_genid = rt6_peer_genid();
1006                 }
1007                 return dst;
1008         }
1009         return NULL;
1010 }
1011
1012 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1013 {
1014         struct rt6_info *rt = (struct rt6_info *) dst;
1015
1016         if (rt) {
1017                 if (rt->rt6i_flags & RTF_CACHE) {
1018                         if (rt6_check_expired(rt)) {
1019                                 ip6_del_rt(rt);
1020                                 dst = NULL;
1021                         }
1022                 } else {
1023                         dst_release(dst);
1024                         dst = NULL;
1025                 }
1026         }
1027         return dst;
1028 }
1029
1030 static void ip6_link_failure(struct sk_buff *skb)
1031 {
1032         struct rt6_info *rt;
1033
1034         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1035
1036         rt = (struct rt6_info *) skb_dst(skb);
1037         if (rt) {
1038                 if (rt->rt6i_flags & RTF_CACHE)
1039                         rt6_update_expires(rt, 0);
1040                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1041                         rt->rt6i_node->fn_sernum = -1;
1042         }
1043 }
1044
1045 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1046 {
1047         struct rt6_info *rt6 = (struct rt6_info*)dst;
1048
1049         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1050                 rt6->rt6i_flags |= RTF_MODIFIED;
1051                 if (mtu < IPV6_MIN_MTU) {
1052                         u32 features = dst_metric(dst, RTAX_FEATURES);
1053                         mtu = IPV6_MIN_MTU;
1054                         features |= RTAX_FEATURE_ALLFRAG;
1055                         dst_metric_set(dst, RTAX_FEATURES, features);
1056                 }
1057                 dst_metric_set(dst, RTAX_MTU, mtu);
1058         }
1059 }
1060
1061 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1062 {
1063         struct net_device *dev = dst->dev;
1064         unsigned int mtu = dst_mtu(dst);
1065         struct net *net = dev_net(dev);
1066
1067         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1068
1069         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1070                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1071
1072         /*
1073          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1074          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1075          * IPV6_MAXPLEN is also valid and means: "any MSS,
1076          * rely only on pmtu discovery"
1077          */
1078         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1079                 mtu = IPV6_MAXPLEN;
1080         return mtu;
1081 }
1082
1083 static unsigned int ip6_mtu(const struct dst_entry *dst)
1084 {
1085         struct inet6_dev *idev;
1086         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1087
1088         if (mtu)
1089                 return mtu;
1090
1091         mtu = IPV6_MIN_MTU;
1092
1093         rcu_read_lock();
1094         idev = __in6_dev_get(dst->dev);
1095         if (idev)
1096                 mtu = idev->cnf.mtu6;
1097         rcu_read_unlock();
1098
1099         return mtu;
1100 }
1101
1102 static struct dst_entry *icmp6_dst_gc_list;
1103 static DEFINE_SPINLOCK(icmp6_dst_lock);
1104
1105 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1106                                   struct neighbour *neigh,
1107                                   struct flowi6 *fl6)
1108 {
1109         struct dst_entry *dst;
1110         struct rt6_info *rt;
1111         struct inet6_dev *idev = in6_dev_get(dev);
1112         struct net *net = dev_net(dev);
1113
1114         if (unlikely(!idev))
1115                 return ERR_PTR(-ENODEV);
1116
1117         rt = ip6_dst_alloc(net, dev, 0);
1118         if (unlikely(!rt)) {
1119                 in6_dev_put(idev);
1120                 dst = ERR_PTR(-ENOMEM);
1121                 goto out;
1122         }
1123
1124         if (neigh)
1125                 neigh_hold(neigh);
1126         else {
1127                 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1128                 if (IS_ERR(neigh)) {
1129                         in6_dev_put(idev);
1130                         dst_free(&rt->dst);
1131                         return ERR_CAST(neigh);
1132                 }
1133         }
1134
1135         rt->dst.flags |= DST_HOST;
1136         rt->dst.output  = ip6_output;
1137         dst_set_neighbour(&rt->dst, neigh);
1138         atomic_set(&rt->dst.__refcnt, 1);
1139         rt->rt6i_dst.addr = fl6->daddr;
1140         rt->rt6i_dst.plen = 128;
1141         rt->rt6i_idev     = idev;
1142         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1143
1144         spin_lock_bh(&icmp6_dst_lock);
1145         rt->dst.next = icmp6_dst_gc_list;
1146         icmp6_dst_gc_list = &rt->dst;
1147         spin_unlock_bh(&icmp6_dst_lock);
1148
1149         fib6_force_start_gc(net);
1150
1151         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1152
1153 out:
1154         return dst;
1155 }
1156
1157 int icmp6_dst_gc(void)
1158 {
1159         struct dst_entry *dst, **pprev;
1160         int more = 0;
1161
1162         spin_lock_bh(&icmp6_dst_lock);
1163         pprev = &icmp6_dst_gc_list;
1164
1165         while ((dst = *pprev) != NULL) {
1166                 if (!atomic_read(&dst->__refcnt)) {
1167                         *pprev = dst->next;
1168                         dst_free(dst);
1169                 } else {
1170                         pprev = &dst->next;
1171                         ++more;
1172                 }
1173         }
1174
1175         spin_unlock_bh(&icmp6_dst_lock);
1176
1177         return more;
1178 }
1179
1180 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1181                             void *arg)
1182 {
1183         struct dst_entry *dst, **pprev;
1184
1185         spin_lock_bh(&icmp6_dst_lock);
1186         pprev = &icmp6_dst_gc_list;
1187         while ((dst = *pprev) != NULL) {
1188                 struct rt6_info *rt = (struct rt6_info *) dst;
1189                 if (func(rt, arg)) {
1190                         *pprev = dst->next;
1191                         dst_free(dst);
1192                 } else {
1193                         pprev = &dst->next;
1194                 }
1195         }
1196         spin_unlock_bh(&icmp6_dst_lock);
1197 }
1198
1199 static int ip6_dst_gc(struct dst_ops *ops)
1200 {
1201         unsigned long now = jiffies;
1202         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1203         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1204         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1205         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1206         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1207         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1208         int entries;
1209
1210         entries = dst_entries_get_fast(ops);
1211         if (time_after(rt_last_gc + rt_min_interval, now) &&
1212             entries <= rt_max_size)
1213                 goto out;
1214
1215         net->ipv6.ip6_rt_gc_expire++;
1216         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1217         net->ipv6.ip6_rt_last_gc = now;
1218         entries = dst_entries_get_slow(ops);
1219         if (entries < ops->gc_thresh)
1220                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1221 out:
1222         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1223         return entries > rt_max_size;
1224 }
1225
1226 /* Clean host part of a prefix. Not necessary in radix tree,
1227    but results in cleaner routing tables.
1228
1229    Remove it only when all the things will work!
1230  */
1231
1232 int ip6_dst_hoplimit(struct dst_entry *dst)
1233 {
1234         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1235         if (hoplimit == 0) {
1236                 struct net_device *dev = dst->dev;
1237                 struct inet6_dev *idev;
1238
1239                 rcu_read_lock();
1240                 idev = __in6_dev_get(dev);
1241                 if (idev)
1242                         hoplimit = idev->cnf.hop_limit;
1243                 else
1244                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1245                 rcu_read_unlock();
1246         }
1247         return hoplimit;
1248 }
1249 EXPORT_SYMBOL(ip6_dst_hoplimit);
1250
1251 /*
1252  *
1253  */
1254
1255 int ip6_route_add(struct fib6_config *cfg)
1256 {
1257         int err;
1258         struct net *net = cfg->fc_nlinfo.nl_net;
1259         struct rt6_info *rt = NULL;
1260         struct net_device *dev = NULL;
1261         struct inet6_dev *idev = NULL;
1262         struct fib6_table *table;
1263         int addr_type;
1264
1265         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1266                 return -EINVAL;
1267 #ifndef CONFIG_IPV6_SUBTREES
1268         if (cfg->fc_src_len)
1269                 return -EINVAL;
1270 #endif
1271         if (cfg->fc_ifindex) {
1272                 err = -ENODEV;
1273                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1274                 if (!dev)
1275                         goto out;
1276                 idev = in6_dev_get(dev);
1277                 if (!idev)
1278                         goto out;
1279         }
1280
1281         if (cfg->fc_metric == 0)
1282                 cfg->fc_metric = IP6_RT_PRIO_USER;
1283
1284         err = -ENOBUFS;
1285         if (cfg->fc_nlinfo.nlh &&
1286             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1287                 table = fib6_get_table(net, cfg->fc_table);
1288                 if (!table) {
1289                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1290                         table = fib6_new_table(net, cfg->fc_table);
1291                 }
1292         } else {
1293                 table = fib6_new_table(net, cfg->fc_table);
1294         }
1295
1296         if (!table)
1297                 goto out;
1298
1299         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT);
1300
1301         if (!rt) {
1302                 err = -ENOMEM;
1303                 goto out;
1304         }
1305
1306         rt->dst.obsolete = -1;
1307
1308         if (cfg->fc_flags & RTF_EXPIRES)
1309                 rt6_set_expires(rt, jiffies +
1310                                 clock_t_to_jiffies(cfg->fc_expires));
1311         else
1312                 rt6_clean_expires(rt);
1313
1314         if (cfg->fc_protocol == RTPROT_UNSPEC)
1315                 cfg->fc_protocol = RTPROT_BOOT;
1316         rt->rt6i_protocol = cfg->fc_protocol;
1317
1318         addr_type = ipv6_addr_type(&cfg->fc_dst);
1319
1320         if (addr_type & IPV6_ADDR_MULTICAST)
1321                 rt->dst.input = ip6_mc_input;
1322         else if (cfg->fc_flags & RTF_LOCAL)
1323                 rt->dst.input = ip6_input;
1324         else
1325                 rt->dst.input = ip6_forward;
1326
1327         rt->dst.output = ip6_output;
1328
1329         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1330         rt->rt6i_dst.plen = cfg->fc_dst_len;
1331         if (rt->rt6i_dst.plen == 128)
1332                rt->dst.flags |= DST_HOST;
1333
1334         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1335                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1336                 if (!metrics) {
1337                         err = -ENOMEM;
1338                         goto out;
1339                 }
1340                 dst_init_metrics(&rt->dst, metrics, 0);
1341         }
1342 #ifdef CONFIG_IPV6_SUBTREES
1343         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1344         rt->rt6i_src.plen = cfg->fc_src_len;
1345 #endif
1346
1347         rt->rt6i_metric = cfg->fc_metric;
1348
1349         /* We cannot add true routes via loopback here,
1350            they would result in kernel looping; promote them to reject routes
1351          */
1352         if ((cfg->fc_flags & RTF_REJECT) ||
1353             (dev && (dev->flags & IFF_LOOPBACK) &&
1354              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1355              !(cfg->fc_flags & RTF_LOCAL))) {
1356                 /* hold loopback dev/idev if we haven't done so. */
1357                 if (dev != net->loopback_dev) {
1358                         if (dev) {
1359                                 dev_put(dev);
1360                                 in6_dev_put(idev);
1361                         }
1362                         dev = net->loopback_dev;
1363                         dev_hold(dev);
1364                         idev = in6_dev_get(dev);
1365                         if (!idev) {
1366                                 err = -ENODEV;
1367                                 goto out;
1368                         }
1369                 }
1370                 rt->dst.output = ip6_pkt_discard_out;
1371                 rt->dst.input = ip6_pkt_discard;
1372                 rt->dst.error = -ENETUNREACH;
1373                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1374                 goto install_route;
1375         }
1376
1377         if (cfg->fc_flags & RTF_GATEWAY) {
1378                 const struct in6_addr *gw_addr;
1379                 int gwa_type;
1380
1381                 gw_addr = &cfg->fc_gateway;
1382                 rt->rt6i_gateway = *gw_addr;
1383                 gwa_type = ipv6_addr_type(gw_addr);
1384
1385                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1386                         struct rt6_info *grt;
1387
1388                         /* IPv6 strictly inhibits using not link-local
1389                            addresses as nexthop address.
1390                            Otherwise, router will not able to send redirects.
1391                            It is very good, but in some (rare!) circumstances
1392                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1393                            some exceptions. --ANK
1394                          */
1395                         err = -EINVAL;
1396                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1397                                 goto out;
1398
1399                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1400
1401                         err = -EHOSTUNREACH;
1402                         if (!grt)
1403                                 goto out;
1404                         if (dev) {
1405                                 if (dev != grt->dst.dev) {
1406                                         dst_release(&grt->dst);
1407                                         goto out;
1408                                 }
1409                         } else {
1410                                 dev = grt->dst.dev;
1411                                 idev = grt->rt6i_idev;
1412                                 dev_hold(dev);
1413                                 in6_dev_hold(grt->rt6i_idev);
1414                         }
1415                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1416                                 err = 0;
1417                         dst_release(&grt->dst);
1418
1419                         if (err)
1420                                 goto out;
1421                 }
1422                 err = -EINVAL;
1423                 if (!dev || (dev->flags & IFF_LOOPBACK))
1424                         goto out;
1425         }
1426
1427         err = -ENODEV;
1428         if (!dev)
1429                 goto out;
1430
1431         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1432                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1433                         err = -EINVAL;
1434                         goto out;
1435                 }
1436                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1437                 rt->rt6i_prefsrc.plen = 128;
1438         } else
1439                 rt->rt6i_prefsrc.plen = 0;
1440
1441         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1442                 err = rt6_bind_neighbour(rt, dev);
1443                 if (err)
1444                         goto out;
1445         }
1446
1447         rt->rt6i_flags = cfg->fc_flags;
1448
1449 install_route:
1450         if (cfg->fc_mx) {
1451                 struct nlattr *nla;
1452                 int remaining;
1453
1454                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1455                         int type = nla_type(nla);
1456
1457                         if (type) {
1458                                 if (type > RTAX_MAX) {
1459                                         err = -EINVAL;
1460                                         goto out;
1461                                 }
1462
1463                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1464                         }
1465                 }
1466         }
1467
1468         rt->dst.dev = dev;
1469         rt->rt6i_idev = idev;
1470         rt->rt6i_table = table;
1471
1472         cfg->fc_nlinfo.nl_net = dev_net(dev);
1473
1474         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1475
1476 out:
1477         if (dev)
1478                 dev_put(dev);
1479         if (idev)
1480                 in6_dev_put(idev);
1481         if (rt)
1482                 dst_free(&rt->dst);
1483         return err;
1484 }
1485
1486 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1487 {
1488         int err;
1489         struct fib6_table *table;
1490         struct net *net = dev_net(rt->dst.dev);
1491
1492         if (rt == net->ipv6.ip6_null_entry)
1493                 return -ENOENT;
1494
1495         table = rt->rt6i_table;
1496         write_lock_bh(&table->tb6_lock);
1497
1498         err = fib6_del(rt, info);
1499         dst_release(&rt->dst);
1500
1501         write_unlock_bh(&table->tb6_lock);
1502
1503         return err;
1504 }
1505
1506 int ip6_del_rt(struct rt6_info *rt)
1507 {
1508         struct nl_info info = {
1509                 .nl_net = dev_net(rt->dst.dev),
1510         };
1511         return __ip6_del_rt(rt, &info);
1512 }
1513
1514 static int ip6_route_del(struct fib6_config *cfg)
1515 {
1516         struct fib6_table *table;
1517         struct fib6_node *fn;
1518         struct rt6_info *rt;
1519         int err = -ESRCH;
1520
1521         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1522         if (!table)
1523                 return err;
1524
1525         read_lock_bh(&table->tb6_lock);
1526
1527         fn = fib6_locate(&table->tb6_root,
1528                          &cfg->fc_dst, cfg->fc_dst_len,
1529                          &cfg->fc_src, cfg->fc_src_len);
1530
1531         if (fn) {
1532                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1533                         if (cfg->fc_ifindex &&
1534                             (!rt->dst.dev ||
1535                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1536                                 continue;
1537                         if (cfg->fc_flags & RTF_GATEWAY &&
1538                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1539                                 continue;
1540                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1541                                 continue;
1542                         dst_hold(&rt->dst);
1543                         read_unlock_bh(&table->tb6_lock);
1544
1545                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1546                 }
1547         }
1548         read_unlock_bh(&table->tb6_lock);
1549
1550         return err;
1551 }
1552
1553 /*
1554  *      Handle redirects
1555  */
1556 struct ip6rd_flowi {
1557         struct flowi6 fl6;
1558         struct in6_addr gateway;
1559 };
1560
1561 static struct rt6_info *__ip6_route_redirect(struct net *net,
1562                                              struct fib6_table *table,
1563                                              struct flowi6 *fl6,
1564                                              int flags)
1565 {
1566         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1567         struct rt6_info *rt;
1568         struct fib6_node *fn;
1569
1570         /*
1571          * Get the "current" route for this destination and
1572          * check if the redirect has come from approriate router.
1573          *
1574          * RFC 2461 specifies that redirects should only be
1575          * accepted if they come from the nexthop to the target.
1576          * Due to the way the routes are chosen, this notion
1577          * is a bit fuzzy and one might need to check all possible
1578          * routes.
1579          */
1580
1581         read_lock_bh(&table->tb6_lock);
1582         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1583 restart:
1584         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1585                 /*
1586                  * Current route is on-link; redirect is always invalid.
1587                  *
1588                  * Seems, previous statement is not true. It could
1589                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1590                  * But then router serving it might decide, that we should
1591                  * know truth 8)8) --ANK (980726).
1592                  */
1593                 if (rt6_check_expired(rt))
1594                         continue;
1595                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1596                         continue;
1597                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1598                         continue;
1599                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1600                         continue;
1601                 break;
1602         }
1603
1604         if (!rt)
1605                 rt = net->ipv6.ip6_null_entry;
1606         BACKTRACK(net, &fl6->saddr);
1607 out:
1608         dst_hold(&rt->dst);
1609
1610         read_unlock_bh(&table->tb6_lock);
1611
1612         return rt;
1613 };
1614
1615 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1616                                            const struct in6_addr *src,
1617                                            const struct in6_addr *gateway,
1618                                            struct net_device *dev)
1619 {
1620         int flags = RT6_LOOKUP_F_HAS_SADDR;
1621         struct net *net = dev_net(dev);
1622         struct ip6rd_flowi rdfl = {
1623                 .fl6 = {
1624                         .flowi6_oif = dev->ifindex,
1625                         .daddr = *dest,
1626                         .saddr = *src,
1627                 },
1628         };
1629
1630         rdfl.gateway = *gateway;
1631
1632         if (rt6_need_strict(dest))
1633                 flags |= RT6_LOOKUP_F_IFACE;
1634
1635         return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1636                                                    flags, __ip6_route_redirect);
1637 }
1638
1639 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1640                   const struct in6_addr *saddr,
1641                   struct neighbour *neigh, u8 *lladdr, int on_link)
1642 {
1643         struct rt6_info *rt, *nrt = NULL;
1644         struct netevent_redirect netevent;
1645         struct net *net = dev_net(neigh->dev);
1646
1647         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1648
1649         if (rt == net->ipv6.ip6_null_entry) {
1650                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1651                 goto out;
1652         }
1653
1654         /*
1655          *      We have finally decided to accept it.
1656          */
1657
1658         neigh_update(neigh, lladdr, NUD_STALE,
1659                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1660                      NEIGH_UPDATE_F_OVERRIDE|
1661                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1662                                      NEIGH_UPDATE_F_ISROUTER))
1663                      );
1664
1665         /*
1666          * Redirect received -> path was valid.
1667          * Look, redirects are sent only in response to data packets,
1668          * so that this nexthop apparently is reachable. --ANK
1669          */
1670         dst_confirm(&rt->dst);
1671
1672         /* Duplicate redirect: silently ignore. */
1673         if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1674                 goto out;
1675
1676         nrt = ip6_rt_copy(rt, dest);
1677         if (!nrt)
1678                 goto out;
1679
1680         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1681         if (on_link)
1682                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1683
1684         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1685         dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1686
1687         if (ip6_ins_rt(nrt))
1688                 goto out;
1689
1690         netevent.old = &rt->dst;
1691         netevent.new = &nrt->dst;
1692         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1693
1694         if (rt->rt6i_flags & RTF_CACHE) {
1695                 ip6_del_rt(rt);
1696                 return;
1697         }
1698
1699 out:
1700         dst_release(&rt->dst);
1701 }
1702
1703 /*
1704  *      Handle ICMP "packet too big" messages
1705  *      i.e. Path MTU discovery
1706  */
1707
1708 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1709                              struct net *net, u32 pmtu, int ifindex)
1710 {
1711         struct rt6_info *rt, *nrt;
1712         int allfrag = 0;
1713 again:
1714         rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1715         if (!rt)
1716                 return;
1717
1718         if (rt6_check_expired(rt)) {
1719                 ip6_del_rt(rt);
1720                 goto again;
1721         }
1722
1723         if (pmtu >= dst_mtu(&rt->dst))
1724                 goto out;
1725
1726         if (pmtu < IPV6_MIN_MTU) {
1727                 /*
1728                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1729                  * MTU (1280) and a fragment header should always be included
1730                  * after a node receiving Too Big message reporting PMTU is
1731                  * less than the IPv6 Minimum Link MTU.
1732                  */
1733                 pmtu = IPV6_MIN_MTU;
1734                 allfrag = 1;
1735         }
1736
1737         /* New mtu received -> path was valid.
1738            They are sent only in response to data packets,
1739            so that this nexthop apparently is reachable. --ANK
1740          */
1741         dst_confirm(&rt->dst);
1742
1743         /* Host route. If it is static, it would be better
1744            not to override it, but add new one, so that
1745            when cache entry will expire old pmtu
1746            would return automatically.
1747          */
1748         if (rt->rt6i_flags & RTF_CACHE) {
1749                 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1750                 if (allfrag) {
1751                         u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1752                         features |= RTAX_FEATURE_ALLFRAG;
1753                         dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1754                 }
1755                 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1756                 rt->rt6i_flags |= RTF_MODIFIED;
1757                 goto out;
1758         }
1759
1760         /* Network route.
1761            Two cases are possible:
1762            1. It is connected route. Action: COW
1763            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1764          */
1765         if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1766                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1767         else
1768                 nrt = rt6_alloc_clone(rt, daddr);
1769
1770         if (nrt) {
1771                 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1772                 if (allfrag) {
1773                         u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1774                         features |= RTAX_FEATURE_ALLFRAG;
1775                         dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1776                 }
1777
1778                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1779                  * happened within 5 mins, the recommended timer is 10 mins.
1780                  * Here this route expiration time is set to ip6_rt_mtu_expires
1781                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1782                  * and detecting PMTU increase will be automatically happened.
1783                  */
1784                 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1785                 nrt->rt6i_flags |= RTF_DYNAMIC;
1786                 ip6_ins_rt(nrt);
1787         }
1788 out:
1789         dst_release(&rt->dst);
1790 }
1791
1792 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1793                         struct net_device *dev, u32 pmtu)
1794 {
1795         struct net *net = dev_net(dev);
1796
1797         /*
1798          * RFC 1981 states that a node "MUST reduce the size of the packets it
1799          * is sending along the path" that caused the Packet Too Big message.
1800          * Since it's not possible in the general case to determine which
1801          * interface was used to send the original packet, we update the MTU
1802          * on the interface that will be used to send future packets. We also
1803          * update the MTU on the interface that received the Packet Too Big in
1804          * case the original packet was forced out that interface with
1805          * SO_BINDTODEVICE or similar. This is the next best thing to the
1806          * correct behaviour, which would be to update the MTU on all
1807          * interfaces.
1808          */
1809         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1810         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1811 }
1812
1813 /*
1814  *      Misc support functions
1815  */
1816
1817 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1818                                     const struct in6_addr *dest)
1819 {
1820         struct net *net = dev_net(ort->dst.dev);
1821         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0);
1822
1823         if (rt) {
1824                 rt->dst.input = ort->dst.input;
1825                 rt->dst.output = ort->dst.output;
1826                 rt->dst.flags |= DST_HOST;
1827
1828                 rt->rt6i_dst.addr = *dest;
1829                 rt->rt6i_dst.plen = 128;
1830                 dst_copy_metrics(&rt->dst, &ort->dst);
1831                 rt->dst.error = ort->dst.error;
1832                 rt->rt6i_idev = ort->rt6i_idev;
1833                 if (rt->rt6i_idev)
1834                         in6_dev_hold(rt->rt6i_idev);
1835                 rt->dst.lastuse = jiffies;
1836
1837                 rt->rt6i_gateway = ort->rt6i_gateway;
1838                 rt->rt6i_flags = ort->rt6i_flags;
1839                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1840                     (RTF_DEFAULT | RTF_ADDRCONF))
1841                         rt6_set_from(rt, ort);
1842                 else
1843                         rt6_clean_expires(rt);
1844                 rt->rt6i_metric = 0;
1845
1846 #ifdef CONFIG_IPV6_SUBTREES
1847                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1848 #endif
1849                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1850                 rt->rt6i_table = ort->rt6i_table;
1851         }
1852         return rt;
1853 }
1854
1855 #ifdef CONFIG_IPV6_ROUTE_INFO
1856 static struct rt6_info *rt6_get_route_info(struct net *net,
1857                                            const struct in6_addr *prefix, int prefixlen,
1858                                            const struct in6_addr *gwaddr, int ifindex)
1859 {
1860         struct fib6_node *fn;
1861         struct rt6_info *rt = NULL;
1862         struct fib6_table *table;
1863
1864         table = fib6_get_table(net, RT6_TABLE_INFO);
1865         if (!table)
1866                 return NULL;
1867
1868         write_lock_bh(&table->tb6_lock);
1869         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1870         if (!fn)
1871                 goto out;
1872
1873         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1874                 if (rt->dst.dev->ifindex != ifindex)
1875                         continue;
1876                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1877                         continue;
1878                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1879                         continue;
1880                 dst_hold(&rt->dst);
1881                 break;
1882         }
1883 out:
1884         write_unlock_bh(&table->tb6_lock);
1885         return rt;
1886 }
1887
1888 static struct rt6_info *rt6_add_route_info(struct net *net,
1889                                            const struct in6_addr *prefix, int prefixlen,
1890                                            const struct in6_addr *gwaddr, int ifindex,
1891                                            unsigned int pref)
1892 {
1893         struct fib6_config cfg = {
1894                 .fc_table       = RT6_TABLE_INFO,
1895                 .fc_metric      = IP6_RT_PRIO_USER,
1896                 .fc_ifindex     = ifindex,
1897                 .fc_dst_len     = prefixlen,
1898                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1899                                   RTF_UP | RTF_PREF(pref),
1900                 .fc_nlinfo.pid = 0,
1901                 .fc_nlinfo.nlh = NULL,
1902                 .fc_nlinfo.nl_net = net,
1903         };
1904
1905         cfg.fc_dst = *prefix;
1906         cfg.fc_gateway = *gwaddr;
1907
1908         /* We should treat it as a default route if prefix length is 0. */
1909         if (!prefixlen)
1910                 cfg.fc_flags |= RTF_DEFAULT;
1911
1912         ip6_route_add(&cfg);
1913
1914         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1915 }
1916 #endif
1917
1918 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1919 {
1920         struct rt6_info *rt;
1921         struct fib6_table *table;
1922
1923         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1924         if (!table)
1925                 return NULL;
1926
1927         write_lock_bh(&table->tb6_lock);
1928         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1929                 if (dev == rt->dst.dev &&
1930                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1931                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1932                         break;
1933         }
1934         if (rt)
1935                 dst_hold(&rt->dst);
1936         write_unlock_bh(&table->tb6_lock);
1937         return rt;
1938 }
1939
1940 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1941                                      struct net_device *dev,
1942                                      unsigned int pref)
1943 {
1944         struct fib6_config cfg = {
1945                 .fc_table       = RT6_TABLE_DFLT,
1946                 .fc_metric      = IP6_RT_PRIO_USER,
1947                 .fc_ifindex     = dev->ifindex,
1948                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1949                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1950                 .fc_nlinfo.pid = 0,
1951                 .fc_nlinfo.nlh = NULL,
1952                 .fc_nlinfo.nl_net = dev_net(dev),
1953         };
1954
1955         cfg.fc_gateway = *gwaddr;
1956
1957         ip6_route_add(&cfg);
1958
1959         return rt6_get_dflt_router(gwaddr, dev);
1960 }
1961
1962 void rt6_purge_dflt_routers(struct net *net)
1963 {
1964         struct rt6_info *rt;
1965         struct fib6_table *table;
1966
1967         /* NOTE: Keep consistent with rt6_get_dflt_router */
1968         table = fib6_get_table(net, RT6_TABLE_DFLT);
1969         if (!table)
1970                 return;
1971
1972 restart:
1973         read_lock_bh(&table->tb6_lock);
1974         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1975                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1976                         dst_hold(&rt->dst);
1977                         read_unlock_bh(&table->tb6_lock);
1978                         ip6_del_rt(rt);
1979                         goto restart;
1980                 }
1981         }
1982         read_unlock_bh(&table->tb6_lock);
1983 }
1984
1985 static void rtmsg_to_fib6_config(struct net *net,
1986                                  struct in6_rtmsg *rtmsg,
1987                                  struct fib6_config *cfg)
1988 {
1989         memset(cfg, 0, sizeof(*cfg));
1990
1991         cfg->fc_table = RT6_TABLE_MAIN;
1992         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1993         cfg->fc_metric = rtmsg->rtmsg_metric;
1994         cfg->fc_expires = rtmsg->rtmsg_info;
1995         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1996         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1997         cfg->fc_flags = rtmsg->rtmsg_flags;
1998
1999         cfg->fc_nlinfo.nl_net = net;
2000
2001         cfg->fc_dst = rtmsg->rtmsg_dst;
2002         cfg->fc_src = rtmsg->rtmsg_src;
2003         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2004 }
2005
2006 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2007 {
2008         struct fib6_config cfg;
2009         struct in6_rtmsg rtmsg;
2010         int err;
2011
2012         switch(cmd) {
2013         case SIOCADDRT:         /* Add a route */
2014         case SIOCDELRT:         /* Delete a route */
2015                 if (!capable(CAP_NET_ADMIN))
2016                         return -EPERM;
2017                 err = copy_from_user(&rtmsg, arg,
2018                                      sizeof(struct in6_rtmsg));
2019                 if (err)
2020                         return -EFAULT;
2021
2022                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2023
2024                 rtnl_lock();
2025                 switch (cmd) {
2026                 case SIOCADDRT:
2027                         err = ip6_route_add(&cfg);
2028                         break;
2029                 case SIOCDELRT:
2030                         err = ip6_route_del(&cfg);
2031                         break;
2032                 default:
2033                         err = -EINVAL;
2034                 }
2035                 rtnl_unlock();
2036
2037                 return err;
2038         }
2039
2040         return -EINVAL;
2041 }
2042
2043 /*
2044  *      Drop the packet on the floor
2045  */
2046
2047 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2048 {
2049         int type;
2050         struct dst_entry *dst = skb_dst(skb);
2051         switch (ipstats_mib_noroutes) {
2052         case IPSTATS_MIB_INNOROUTES:
2053                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2054                 if (type == IPV6_ADDR_ANY) {
2055                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2056                                       IPSTATS_MIB_INADDRERRORS);
2057                         break;
2058                 }
2059                 /* FALLTHROUGH */
2060         case IPSTATS_MIB_OUTNOROUTES:
2061                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2062                               ipstats_mib_noroutes);
2063                 break;
2064         }
2065         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2066         kfree_skb(skb);
2067         return 0;
2068 }
2069
2070 static int ip6_pkt_discard(struct sk_buff *skb)
2071 {
2072         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2073 }
2074
2075 static int ip6_pkt_discard_out(struct sk_buff *skb)
2076 {
2077         skb->dev = skb_dst(skb)->dev;
2078         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2079 }
2080
2081 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2082
2083 static int ip6_pkt_prohibit(struct sk_buff *skb)
2084 {
2085         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2086 }
2087
2088 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2089 {
2090         skb->dev = skb_dst(skb)->dev;
2091         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2092 }
2093
2094 #endif
2095
2096 /*
2097  *      Allocate a dst for local (unicast / anycast) address.
2098  */
2099
2100 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2101                                     const struct in6_addr *addr,
2102                                     bool anycast)
2103 {
2104         struct net *net = dev_net(idev->dev);
2105         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0);
2106         int err;
2107
2108         if (!rt) {
2109                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2110                 return ERR_PTR(-ENOMEM);
2111         }
2112
2113         in6_dev_hold(idev);
2114
2115         rt->dst.flags |= DST_HOST;
2116         rt->dst.input = ip6_input;
2117         rt->dst.output = ip6_output;
2118         rt->rt6i_idev = idev;
2119         rt->dst.obsolete = -1;
2120
2121         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2122         if (anycast)
2123                 rt->rt6i_flags |= RTF_ANYCAST;
2124         else
2125                 rt->rt6i_flags |= RTF_LOCAL;
2126         err = rt6_bind_neighbour(rt, rt->dst.dev);
2127         if (err) {
2128                 dst_free(&rt->dst);
2129                 return ERR_PTR(err);
2130         }
2131
2132         rt->rt6i_dst.addr = *addr;
2133         rt->rt6i_dst.plen = 128;
2134         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2135
2136         atomic_set(&rt->dst.__refcnt, 1);
2137
2138         return rt;
2139 }
2140
2141 int ip6_route_get_saddr(struct net *net,
2142                         struct rt6_info *rt,
2143                         const struct in6_addr *daddr,
2144                         unsigned int prefs,
2145                         struct in6_addr *saddr)
2146 {
2147         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2148         int err = 0;
2149         if (rt->rt6i_prefsrc.plen)
2150                 *saddr = rt->rt6i_prefsrc.addr;
2151         else
2152                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2153                                          daddr, prefs, saddr);
2154         return err;
2155 }
2156
2157 /* remove deleted ip from prefsrc entries */
2158 struct arg_dev_net_ip {
2159         struct net_device *dev;
2160         struct net *net;
2161         struct in6_addr *addr;
2162 };
2163
2164 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2165 {
2166         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2167         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2168         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2169
2170         if (((void *)rt->dst.dev == dev || !dev) &&
2171             rt != net->ipv6.ip6_null_entry &&
2172             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2173                 /* remove prefsrc entry */
2174                 rt->rt6i_prefsrc.plen = 0;
2175         }
2176         return 0;
2177 }
2178
2179 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2180 {
2181         struct net *net = dev_net(ifp->idev->dev);
2182         struct arg_dev_net_ip adni = {
2183                 .dev = ifp->idev->dev,
2184                 .net = net,
2185                 .addr = &ifp->addr,
2186         };
2187         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2188 }
2189
2190 struct arg_dev_net {
2191         struct net_device *dev;
2192         struct net *net;
2193 };
2194
2195 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2196 {
2197         const struct arg_dev_net *adn = arg;
2198         const struct net_device *dev = adn->dev;
2199
2200         if ((rt->dst.dev == dev || !dev) &&
2201             rt != adn->net->ipv6.ip6_null_entry)
2202                 return -1;
2203
2204         return 0;
2205 }
2206
2207 void rt6_ifdown(struct net *net, struct net_device *dev)
2208 {
2209         struct arg_dev_net adn = {
2210                 .dev = dev,
2211                 .net = net,
2212         };
2213
2214         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2215         icmp6_clean_all(fib6_ifdown, &adn);
2216 }
2217
2218 struct rt6_mtu_change_arg {
2219         struct net_device *dev;
2220         unsigned int mtu;
2221 };
2222
2223 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2224 {
2225         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2226         struct inet6_dev *idev;
2227
2228         /* In IPv6 pmtu discovery is not optional,
2229            so that RTAX_MTU lock cannot disable it.
2230            We still use this lock to block changes
2231            caused by addrconf/ndisc.
2232         */
2233
2234         idev = __in6_dev_get(arg->dev);
2235         if (!idev)
2236                 return 0;
2237
2238         /* For administrative MTU increase, there is no way to discover
2239            IPv6 PMTU increase, so PMTU increase should be updated here.
2240            Since RFC 1981 doesn't include administrative MTU increase
2241            update PMTU increase is a MUST. (i.e. jumbo frame)
2242          */
2243         /*
2244            If new MTU is less than route PMTU, this new MTU will be the
2245            lowest MTU in the path, update the route PMTU to reflect PMTU
2246            decreases; if new MTU is greater than route PMTU, and the
2247            old MTU is the lowest MTU in the path, update the route PMTU
2248            to reflect the increase. In this case if the other nodes' MTU
2249            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2250            PMTU discouvery.
2251          */
2252         if (rt->dst.dev == arg->dev &&
2253             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2254             (dst_mtu(&rt->dst) >= arg->mtu ||
2255              (dst_mtu(&rt->dst) < arg->mtu &&
2256               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2257                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2258         }
2259         return 0;
2260 }
2261
2262 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2263 {
2264         struct rt6_mtu_change_arg arg = {
2265                 .dev = dev,
2266                 .mtu = mtu,
2267         };
2268
2269         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2270 }
2271
2272 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2273         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2274         [RTA_OIF]               = { .type = NLA_U32 },
2275         [RTA_IIF]               = { .type = NLA_U32 },
2276         [RTA_PRIORITY]          = { .type = NLA_U32 },
2277         [RTA_METRICS]           = { .type = NLA_NESTED },
2278 };
2279
2280 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2281                               struct fib6_config *cfg)
2282 {
2283         struct rtmsg *rtm;
2284         struct nlattr *tb[RTA_MAX+1];
2285         int err;
2286
2287         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2288         if (err < 0)
2289                 goto errout;
2290
2291         err = -EINVAL;
2292         rtm = nlmsg_data(nlh);
2293         memset(cfg, 0, sizeof(*cfg));
2294
2295         cfg->fc_table = rtm->rtm_table;
2296         cfg->fc_dst_len = rtm->rtm_dst_len;
2297         cfg->fc_src_len = rtm->rtm_src_len;
2298         cfg->fc_flags = RTF_UP;
2299         cfg->fc_protocol = rtm->rtm_protocol;
2300
2301         if (rtm->rtm_type == RTN_UNREACHABLE)
2302                 cfg->fc_flags |= RTF_REJECT;
2303
2304         if (rtm->rtm_type == RTN_LOCAL)
2305                 cfg->fc_flags |= RTF_LOCAL;
2306
2307         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2308         cfg->fc_nlinfo.nlh = nlh;
2309         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2310
2311         if (tb[RTA_GATEWAY]) {
2312                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2313                 cfg->fc_flags |= RTF_GATEWAY;
2314         }
2315
2316         if (tb[RTA_DST]) {
2317                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2318
2319                 if (nla_len(tb[RTA_DST]) < plen)
2320                         goto errout;
2321
2322                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2323         }
2324
2325         if (tb[RTA_SRC]) {
2326                 int plen = (rtm->rtm_src_len + 7) >> 3;
2327
2328                 if (nla_len(tb[RTA_SRC]) < plen)
2329                         goto errout;
2330
2331                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2332         }
2333
2334         if (tb[RTA_PREFSRC])
2335                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2336
2337         if (tb[RTA_OIF])
2338                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2339
2340         if (tb[RTA_PRIORITY])
2341                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2342
2343         if (tb[RTA_METRICS]) {
2344                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2345                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2346         }
2347
2348         if (tb[RTA_TABLE])
2349                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2350
2351         err = 0;
2352 errout:
2353         return err;
2354 }
2355
2356 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2357 {
2358         struct fib6_config cfg;
2359         int err;
2360
2361         err = rtm_to_fib6_config(skb, nlh, &cfg);
2362         if (err < 0)
2363                 return err;
2364
2365         return ip6_route_del(&cfg);
2366 }
2367
2368 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2369 {
2370         struct fib6_config cfg;
2371         int err;
2372
2373         err = rtm_to_fib6_config(skb, nlh, &cfg);
2374         if (err < 0)
2375                 return err;
2376
2377         return ip6_route_add(&cfg);
2378 }
2379
2380 static inline size_t rt6_nlmsg_size(void)
2381 {
2382         return NLMSG_ALIGN(sizeof(struct rtmsg))
2383                + nla_total_size(16) /* RTA_SRC */
2384                + nla_total_size(16) /* RTA_DST */
2385                + nla_total_size(16) /* RTA_GATEWAY */
2386                + nla_total_size(16) /* RTA_PREFSRC */
2387                + nla_total_size(4) /* RTA_TABLE */
2388                + nla_total_size(4) /* RTA_IIF */
2389                + nla_total_size(4) /* RTA_OIF */
2390                + nla_total_size(4) /* RTA_PRIORITY */
2391                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2392                + nla_total_size(sizeof(struct rta_cacheinfo));
2393 }
2394
2395 static int rt6_fill_node(struct net *net,
2396                          struct sk_buff *skb, struct rt6_info *rt,
2397                          struct in6_addr *dst, struct in6_addr *src,
2398                          int iif, int type, u32 pid, u32 seq,
2399                          int prefix, int nowait, unsigned int flags)
2400 {
2401         const struct inet_peer *peer;
2402         struct rtmsg *rtm;
2403         struct nlmsghdr *nlh;
2404         long expires;
2405         u32 table;
2406         struct neighbour *n;
2407         u32 ts, tsage;
2408
2409         if (prefix) {   /* user wants prefix routes only */
2410                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2411                         /* success since this is not a prefix route */
2412                         return 1;
2413                 }
2414         }
2415
2416         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2417         if (!nlh)
2418                 return -EMSGSIZE;
2419
2420         rtm = nlmsg_data(nlh);
2421         rtm->rtm_family = AF_INET6;
2422         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2423         rtm->rtm_src_len = rt->rt6i_src.plen;
2424         rtm->rtm_tos = 0;
2425         if (rt->rt6i_table)
2426                 table = rt->rt6i_table->tb6_id;
2427         else
2428                 table = RT6_TABLE_UNSPEC;
2429         rtm->rtm_table = table;
2430         if (nla_put_u32(skb, RTA_TABLE, table))
2431                 goto nla_put_failure;
2432         if (rt->rt6i_flags & RTF_REJECT)
2433                 rtm->rtm_type = RTN_UNREACHABLE;
2434         else if (rt->rt6i_flags & RTF_LOCAL)
2435                 rtm->rtm_type = RTN_LOCAL;
2436         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2437                 rtm->rtm_type = RTN_LOCAL;
2438         else
2439                 rtm->rtm_type = RTN_UNICAST;
2440         rtm->rtm_flags = 0;
2441         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2442         rtm->rtm_protocol = rt->rt6i_protocol;
2443         if (rt->rt6i_flags & RTF_DYNAMIC)
2444                 rtm->rtm_protocol = RTPROT_REDIRECT;
2445         else if (rt->rt6i_flags & RTF_ADDRCONF)
2446                 rtm->rtm_protocol = RTPROT_KERNEL;
2447         else if (rt->rt6i_flags & RTF_DEFAULT)
2448                 rtm->rtm_protocol = RTPROT_RA;
2449
2450         if (rt->rt6i_flags & RTF_CACHE)
2451                 rtm->rtm_flags |= RTM_F_CLONED;
2452
2453         if (dst) {
2454                 if (nla_put(skb, RTA_DST, 16, dst))
2455                         goto nla_put_failure;
2456                 rtm->rtm_dst_len = 128;
2457         } else if (rtm->rtm_dst_len)
2458                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2459                         goto nla_put_failure;
2460 #ifdef CONFIG_IPV6_SUBTREES
2461         if (src) {
2462                 if (nla_put(skb, RTA_SRC, 16, src))
2463                         goto nla_put_failure;
2464                 rtm->rtm_src_len = 128;
2465         } else if (rtm->rtm_src_len &&
2466                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2467                 goto nla_put_failure;
2468 #endif
2469         if (iif) {
2470 #ifdef CONFIG_IPV6_MROUTE
2471                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2472                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2473                         if (err <= 0) {
2474                                 if (!nowait) {
2475                                         if (err == 0)
2476                                                 return 0;
2477                                         goto nla_put_failure;
2478                                 } else {
2479                                         if (err == -EMSGSIZE)
2480                                                 goto nla_put_failure;
2481                                 }
2482                         }
2483                 } else
2484 #endif
2485                         if (nla_put_u32(skb, RTA_IIF, iif))
2486                                 goto nla_put_failure;
2487         } else if (dst) {
2488                 struct in6_addr saddr_buf;
2489                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2490                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2491                         goto nla_put_failure;
2492         }
2493
2494         if (rt->rt6i_prefsrc.plen) {
2495                 struct in6_addr saddr_buf;
2496                 saddr_buf = rt->rt6i_prefsrc.addr;
2497                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2498                         goto nla_put_failure;
2499         }
2500
2501         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2502                 goto nla_put_failure;
2503
2504         rcu_read_lock();
2505         n = dst_get_neighbour_noref(&rt->dst);
2506         if (n) {
2507                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2508                         rcu_read_unlock();
2509                         goto nla_put_failure;
2510                 }
2511         }
2512         rcu_read_unlock();
2513
2514         if (rt->dst.dev &&
2515             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2516                 goto nla_put_failure;
2517         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2518                 goto nla_put_failure;
2519         if (!(rt->rt6i_flags & RTF_EXPIRES))
2520                 expires = 0;
2521         else if (rt->dst.expires - jiffies < INT_MAX)
2522                 expires = rt->dst.expires - jiffies;
2523         else
2524                 expires = INT_MAX;
2525
2526         peer = NULL;
2527         if (rt6_has_peer(rt))
2528                 peer = rt6_peer_ptr(rt);
2529         ts = tsage = 0;
2530         if (peer && peer->tcp_ts_stamp) {
2531                 ts = peer->tcp_ts;
2532                 tsage = get_seconds() - peer->tcp_ts_stamp;
2533         }
2534
2535         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2536                                expires, rt->dst.error) < 0)
2537                 goto nla_put_failure;
2538
2539         return nlmsg_end(skb, nlh);
2540
2541 nla_put_failure:
2542         nlmsg_cancel(skb, nlh);
2543         return -EMSGSIZE;
2544 }
2545
2546 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2547 {
2548         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2549         int prefix;
2550
2551         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2552                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2553                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2554         } else
2555                 prefix = 0;
2556
2557         return rt6_fill_node(arg->net,
2558                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2559                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2560                      prefix, 0, NLM_F_MULTI);
2561 }
2562
2563 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2564 {
2565         struct net *net = sock_net(in_skb->sk);
2566         struct nlattr *tb[RTA_MAX+1];
2567         struct rt6_info *rt;
2568         struct sk_buff *skb;
2569         struct rtmsg *rtm;
2570         struct flowi6 fl6;
2571         int err, iif = 0, oif = 0;
2572
2573         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2574         if (err < 0)
2575                 goto errout;
2576
2577         err = -EINVAL;
2578         memset(&fl6, 0, sizeof(fl6));
2579
2580         if (tb[RTA_SRC]) {
2581                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2582                         goto errout;
2583
2584                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2585         }
2586
2587         if (tb[RTA_DST]) {
2588                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2589                         goto errout;
2590
2591                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2592         }
2593
2594         if (tb[RTA_IIF])
2595                 iif = nla_get_u32(tb[RTA_IIF]);
2596
2597         if (tb[RTA_OIF])
2598                 oif = nla_get_u32(tb[RTA_OIF]);
2599
2600         if (iif) {
2601                 struct net_device *dev;
2602                 int flags = 0;
2603
2604                 dev = __dev_get_by_index(net, iif);
2605                 if (!dev) {
2606                         err = -ENODEV;
2607                         goto errout;
2608                 }
2609
2610                 fl6.flowi6_iif = iif;
2611
2612                 if (!ipv6_addr_any(&fl6.saddr))
2613                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2614
2615                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2616                                                                flags);
2617         } else {
2618                 fl6.flowi6_oif = oif;
2619
2620                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2621         }
2622
2623         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2624         if (!skb) {
2625                 dst_release(&rt->dst);
2626                 err = -ENOBUFS;
2627                 goto errout;
2628         }
2629
2630         /* Reserve room for dummy headers, this skb can pass
2631            through good chunk of routing engine.
2632          */
2633         skb_reset_mac_header(skb);
2634         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2635
2636         skb_dst_set(skb, &rt->dst);
2637
2638         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2639                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2640                             nlh->nlmsg_seq, 0, 0, 0);
2641         if (err < 0) {
2642                 kfree_skb(skb);
2643                 goto errout;
2644         }
2645
2646         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2647 errout:
2648         return err;
2649 }
2650
2651 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2652 {
2653         struct sk_buff *skb;
2654         struct net *net = info->nl_net;
2655         u32 seq;
2656         int err;
2657
2658         err = -ENOBUFS;
2659         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2660
2661         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2662         if (!skb)
2663                 goto errout;
2664
2665         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2666                                 event, info->pid, seq, 0, 0, 0);
2667         if (err < 0) {
2668                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2669                 WARN_ON(err == -EMSGSIZE);
2670                 kfree_skb(skb);
2671                 goto errout;
2672         }
2673         rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2674                     info->nlh, gfp_any());
2675         return;
2676 errout:
2677         if (err < 0)
2678                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2679 }
2680
2681 static int ip6_route_dev_notify(struct notifier_block *this,
2682                                 unsigned long event, void *data)
2683 {
2684         struct net_device *dev = (struct net_device *)data;
2685         struct net *net = dev_net(dev);
2686
2687         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2688                 net->ipv6.ip6_null_entry->dst.dev = dev;
2689                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2690 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2691                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2692                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2693                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2694                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2695 #endif
2696         }
2697
2698         return NOTIFY_OK;
2699 }
2700
2701 /*
2702  *      /proc
2703  */
2704
2705 #ifdef CONFIG_PROC_FS
2706
2707 struct rt6_proc_arg
2708 {
2709         char *buffer;
2710         int offset;
2711         int length;
2712         int skip;
2713         int len;
2714 };
2715
2716 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2717 {
2718         struct seq_file *m = p_arg;
2719         struct neighbour *n;
2720
2721         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2722
2723 #ifdef CONFIG_IPV6_SUBTREES
2724         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2725 #else
2726         seq_puts(m, "00000000000000000000000000000000 00 ");
2727 #endif
2728         rcu_read_lock();
2729         n = dst_get_neighbour_noref(&rt->dst);
2730         if (n) {
2731                 seq_printf(m, "%pi6", n->primary_key);
2732         } else {
2733                 seq_puts(m, "00000000000000000000000000000000");
2734         }
2735         rcu_read_unlock();
2736         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2737                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2738                    rt->dst.__use, rt->rt6i_flags,
2739                    rt->dst.dev ? rt->dst.dev->name : "");
2740         return 0;
2741 }
2742
2743 static int ipv6_route_show(struct seq_file *m, void *v)
2744 {
2745         struct net *net = (struct net *)m->private;
2746         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2747         return 0;
2748 }
2749
2750 static int ipv6_route_open(struct inode *inode, struct file *file)
2751 {
2752         return single_open_net(inode, file, ipv6_route_show);
2753 }
2754
2755 static const struct file_operations ipv6_route_proc_fops = {
2756         .owner          = THIS_MODULE,
2757         .open           = ipv6_route_open,
2758         .read           = seq_read,
2759         .llseek         = seq_lseek,
2760         .release        = single_release_net,
2761 };
2762
2763 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2764 {
2765         struct net *net = (struct net *)seq->private;
2766         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2767                    net->ipv6.rt6_stats->fib_nodes,
2768                    net->ipv6.rt6_stats->fib_route_nodes,
2769                    net->ipv6.rt6_stats->fib_rt_alloc,
2770                    net->ipv6.rt6_stats->fib_rt_entries,
2771                    net->ipv6.rt6_stats->fib_rt_cache,
2772                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2773                    net->ipv6.rt6_stats->fib_discarded_routes);
2774
2775         return 0;
2776 }
2777
2778 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2779 {
2780         return single_open_net(inode, file, rt6_stats_seq_show);
2781 }
2782
2783 static const struct file_operations rt6_stats_seq_fops = {
2784         .owner   = THIS_MODULE,
2785         .open    = rt6_stats_seq_open,
2786         .read    = seq_read,
2787         .llseek  = seq_lseek,
2788         .release = single_release_net,
2789 };
2790 #endif  /* CONFIG_PROC_FS */
2791
2792 #ifdef CONFIG_SYSCTL
2793
2794 static
2795 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2796                               void __user *buffer, size_t *lenp, loff_t *ppos)
2797 {
2798         struct net *net;
2799         int delay;
2800         if (!write)
2801                 return -EINVAL;
2802
2803         net = (struct net *)ctl->extra1;
2804         delay = net->ipv6.sysctl.flush_delay;
2805         proc_dointvec(ctl, write, buffer, lenp, ppos);
2806         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2807         return 0;
2808 }
2809
2810 ctl_table ipv6_route_table_template[] = {
2811         {
2812                 .procname       =       "flush",
2813                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2814                 .maxlen         =       sizeof(int),
2815                 .mode           =       0200,
2816                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2817         },
2818         {
2819                 .procname       =       "gc_thresh",
2820                 .data           =       &ip6_dst_ops_template.gc_thresh,
2821                 .maxlen         =       sizeof(int),
2822                 .mode           =       0644,
2823                 .proc_handler   =       proc_dointvec,
2824         },
2825         {
2826                 .procname       =       "max_size",
2827                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2828                 .maxlen         =       sizeof(int),
2829                 .mode           =       0644,
2830                 .proc_handler   =       proc_dointvec,
2831         },
2832         {
2833                 .procname       =       "gc_min_interval",
2834                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2835                 .maxlen         =       sizeof(int),
2836                 .mode           =       0644,
2837                 .proc_handler   =       proc_dointvec_jiffies,
2838         },
2839         {
2840                 .procname       =       "gc_timeout",
2841                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2842                 .maxlen         =       sizeof(int),
2843                 .mode           =       0644,
2844                 .proc_handler   =       proc_dointvec_jiffies,
2845         },
2846         {
2847                 .procname       =       "gc_interval",
2848                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2849                 .maxlen         =       sizeof(int),
2850                 .mode           =       0644,
2851                 .proc_handler   =       proc_dointvec_jiffies,
2852         },
2853         {
2854                 .procname       =       "gc_elasticity",
2855                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2856                 .maxlen         =       sizeof(int),
2857                 .mode           =       0644,
2858                 .proc_handler   =       proc_dointvec,
2859         },
2860         {
2861                 .procname       =       "mtu_expires",
2862                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2863                 .maxlen         =       sizeof(int),
2864                 .mode           =       0644,
2865                 .proc_handler   =       proc_dointvec_jiffies,
2866         },
2867         {
2868                 .procname       =       "min_adv_mss",
2869                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2870                 .maxlen         =       sizeof(int),
2871                 .mode           =       0644,
2872                 .proc_handler   =       proc_dointvec,
2873         },
2874         {
2875                 .procname       =       "gc_min_interval_ms",
2876                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2877                 .maxlen         =       sizeof(int),
2878                 .mode           =       0644,
2879                 .proc_handler   =       proc_dointvec_ms_jiffies,
2880         },
2881         { }
2882 };
2883
2884 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2885 {
2886         struct ctl_table *table;
2887
2888         table = kmemdup(ipv6_route_table_template,
2889                         sizeof(ipv6_route_table_template),
2890                         GFP_KERNEL);
2891
2892         if (table) {
2893                 table[0].data = &net->ipv6.sysctl.flush_delay;
2894                 table[0].extra1 = net;
2895                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2896                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2897                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2898                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2899                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2900                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2901                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2902                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2903                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2904         }
2905
2906         return table;
2907 }
2908 #endif
2909
2910 static int __net_init ip6_route_net_init(struct net *net)
2911 {
2912         int ret = -ENOMEM;
2913
2914         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2915                sizeof(net->ipv6.ip6_dst_ops));
2916
2917         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2918                 goto out_ip6_dst_ops;
2919
2920         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2921                                            sizeof(*net->ipv6.ip6_null_entry),
2922                                            GFP_KERNEL);
2923         if (!net->ipv6.ip6_null_entry)
2924                 goto out_ip6_dst_entries;
2925         net->ipv6.ip6_null_entry->dst.path =
2926                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2927         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2928         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2929                          ip6_template_metrics, true);
2930
2931 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2932         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2933                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2934                                                GFP_KERNEL);
2935         if (!net->ipv6.ip6_prohibit_entry)
2936                 goto out_ip6_null_entry;
2937         net->ipv6.ip6_prohibit_entry->dst.path =
2938                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2939         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2940         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2941                          ip6_template_metrics, true);
2942
2943         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2944                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2945                                                GFP_KERNEL);
2946         if (!net->ipv6.ip6_blk_hole_entry)
2947                 goto out_ip6_prohibit_entry;
2948         net->ipv6.ip6_blk_hole_entry->dst.path =
2949                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2950         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2951         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2952                          ip6_template_metrics, true);
2953 #endif
2954
2955         net->ipv6.sysctl.flush_delay = 0;
2956         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2957         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2958         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2959         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2960         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2961         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2962         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2963
2964 #ifdef CONFIG_PROC_FS
2965         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2966         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2967 #endif
2968         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2969
2970         ret = 0;
2971 out:
2972         return ret;
2973
2974 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2975 out_ip6_prohibit_entry:
2976         kfree(net->ipv6.ip6_prohibit_entry);
2977 out_ip6_null_entry:
2978         kfree(net->ipv6.ip6_null_entry);
2979 #endif
2980 out_ip6_dst_entries:
2981         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2982 out_ip6_dst_ops:
2983         goto out;
2984 }
2985
2986 static void __net_exit ip6_route_net_exit(struct net *net)
2987 {
2988 #ifdef CONFIG_PROC_FS
2989         proc_net_remove(net, "ipv6_route");
2990         proc_net_remove(net, "rt6_stats");
2991 #endif
2992         kfree(net->ipv6.ip6_null_entry);
2993 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2994         kfree(net->ipv6.ip6_prohibit_entry);
2995         kfree(net->ipv6.ip6_blk_hole_entry);
2996 #endif
2997         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2998 }
2999
3000 static struct pernet_operations ip6_route_net_ops = {
3001         .init = ip6_route_net_init,
3002         .exit = ip6_route_net_exit,
3003 };
3004
3005 static int __net_init ipv6_inetpeer_init(struct net *net)
3006 {
3007         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3008
3009         if (!bp)
3010                 return -ENOMEM;
3011         inet_peer_base_init(bp);
3012         net->ipv6.peers = bp;
3013         return 0;
3014 }
3015
3016 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3017 {
3018         struct inet_peer_base *bp = net->ipv6.peers;
3019
3020         net->ipv6.peers = NULL;
3021         inetpeer_invalidate_tree(bp);
3022         kfree(bp);
3023 }
3024
3025 static struct pernet_operations ipv6_inetpeer_ops = {
3026         .init   =       ipv6_inetpeer_init,
3027         .exit   =       ipv6_inetpeer_exit,
3028 };
3029
3030 static struct notifier_block ip6_route_dev_notifier = {
3031         .notifier_call = ip6_route_dev_notify,
3032         .priority = 0,
3033 };
3034
3035 int __init ip6_route_init(void)
3036 {
3037         int ret;
3038
3039         ret = -ENOMEM;
3040         ip6_dst_ops_template.kmem_cachep =
3041                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3042                                   SLAB_HWCACHE_ALIGN, NULL);
3043         if (!ip6_dst_ops_template.kmem_cachep)
3044                 goto out;
3045
3046         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3047         if (ret)
3048                 goto out_kmem_cache;
3049
3050         ret = register_pernet_subsys(&ip6_route_net_ops);
3051         if (ret)
3052                 goto out_dst_entries;
3053
3054         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3055         if (ret)
3056                 goto out_register_subsys;
3057
3058         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3059
3060         /* Registering of the loopback is done before this portion of code,
3061          * the loopback reference in rt6_info will not be taken, do it
3062          * manually for init_net */
3063         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3064         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3065   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3066         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3067         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3068         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3069         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3070   #endif
3071         ret = fib6_init();
3072         if (ret)
3073                 goto out_register_inetpeer;
3074
3075         ret = xfrm6_init();
3076         if (ret)
3077                 goto out_fib6_init;
3078
3079         ret = fib6_rules_init();
3080         if (ret)
3081                 goto xfrm6_init;
3082
3083         ret = -ENOBUFS;
3084         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3085             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3086             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3087                 goto fib6_rules_init;
3088
3089         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3090         if (ret)
3091                 goto fib6_rules_init;
3092
3093 out:
3094         return ret;
3095
3096 fib6_rules_init:
3097         fib6_rules_cleanup();
3098 xfrm6_init:
3099         xfrm6_fini();
3100 out_fib6_init:
3101         fib6_gc_cleanup();
3102 out_register_inetpeer:
3103         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3104 out_register_subsys:
3105         unregister_pernet_subsys(&ip6_route_net_ops);
3106 out_dst_entries:
3107         dst_entries_destroy(&ip6_dst_blackhole_ops);
3108 out_kmem_cache:
3109         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3110         goto out;
3111 }
3112
3113 void ip6_route_cleanup(void)
3114 {
3115         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3116         fib6_rules_cleanup();
3117         xfrm6_fini();
3118         fib6_gc_cleanup();
3119         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3120         unregister_pernet_subsys(&ip6_route_net_ops);
3121         dst_entries_destroy(&ip6_dst_blackhole_ops);
3122         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3123 }