net: Enable a userns root rtnl calls that are safe for unprivilged users
[linux-3.10.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 0,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326         struct inet_peer_base *base;
327         struct inet_peer *peer;
328
329         base = inetpeer_base_ptr(rt->_rt6i_peer);
330         if (!base)
331                 return;
332
333         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334         if (peer) {
335                 if (!rt6_set_peer(rt, peer))
336                         inet_putpeer(peer);
337         }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341                            int how)
342 {
343         struct rt6_info *rt = (struct rt6_info *)dst;
344         struct inet6_dev *idev = rt->rt6i_idev;
345         struct net_device *loopback_dev =
346                 dev_net(dev)->loopback_dev;
347
348         if (dev != loopback_dev) {
349                 if (idev && idev->dev == dev) {
350                         struct inet6_dev *loopback_idev =
351                                 in6_dev_get(loopback_dev);
352                         if (loopback_idev) {
353                                 rt->rt6i_idev = loopback_idev;
354                                 in6_dev_put(idev);
355                         }
356                 }
357                 if (rt->n && rt->n->dev == dev) {
358                         rt->n->dev = loopback_dev;
359                         dev_hold(loopback_dev);
360                         dev_put(dev);
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378         return ipv6_addr_type(daddr) &
379                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383  *   Hash based function using packet header and flowlabel.
384  * Adapted from fib_info_hashfn()
385  */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387                                const struct flowi6 *fl6)
388 {
389         unsigned int val = fl6->flowi6_proto;
390
391         val ^= (__force u32)fl6->daddr.s6_addr32[0];
392         val ^= (__force u32)fl6->daddr.s6_addr32[1];
393         val ^= (__force u32)fl6->daddr.s6_addr32[2];
394         val ^= (__force u32)fl6->daddr.s6_addr32[3];
395
396         val ^= (__force u32)fl6->saddr.s6_addr32[0];
397         val ^= (__force u32)fl6->saddr.s6_addr32[1];
398         val ^= (__force u32)fl6->saddr.s6_addr32[2];
399         val ^= (__force u32)fl6->saddr.s6_addr32[3];
400
401         /* Work only if this not encapsulated */
402         switch (fl6->flowi6_proto) {
403         case IPPROTO_UDP:
404         case IPPROTO_TCP:
405         case IPPROTO_SCTP:
406                 val ^= (__force u16)fl6->fl6_sport;
407                 val ^= (__force u16)fl6->fl6_dport;
408                 break;
409
410         case IPPROTO_ICMPV6:
411                 val ^= (__force u16)fl6->fl6_icmp_type;
412                 val ^= (__force u16)fl6->fl6_icmp_code;
413                 break;
414         }
415         /* RFC6438 recommands to use flowlabel */
416         val ^= (__force u32)fl6->flowlabel;
417
418         /* Perhaps, we need to tune, this function? */
419         val = val ^ (val >> 7) ^ (val >> 12);
420         return val % candidate_count;
421 }
422
423 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424                                              struct flowi6 *fl6)
425 {
426         struct rt6_info *sibling, *next_sibling;
427         int route_choosen;
428
429         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430         /* Don't change the route, if route_choosen == 0
431          * (siblings does not include ourself)
432          */
433         if (route_choosen)
434                 list_for_each_entry_safe(sibling, next_sibling,
435                                 &match->rt6i_siblings, rt6i_siblings) {
436                         route_choosen--;
437                         if (route_choosen == 0) {
438                                 match = sibling;
439                                 break;
440                         }
441                 }
442         return match;
443 }
444
445 /*
446  *      Route lookup. Any table->tb6_lock is implied.
447  */
448
449 static inline struct rt6_info *rt6_device_match(struct net *net,
450                                                     struct rt6_info *rt,
451                                                     const struct in6_addr *saddr,
452                                                     int oif,
453                                                     int flags)
454 {
455         struct rt6_info *local = NULL;
456         struct rt6_info *sprt;
457
458         if (!oif && ipv6_addr_any(saddr))
459                 goto out;
460
461         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462                 struct net_device *dev = sprt->dst.dev;
463
464                 if (oif) {
465                         if (dev->ifindex == oif)
466                                 return sprt;
467                         if (dev->flags & IFF_LOOPBACK) {
468                                 if (!sprt->rt6i_idev ||
469                                     sprt->rt6i_idev->dev->ifindex != oif) {
470                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
471                                                 continue;
472                                         if (local && (!oif ||
473                                                       local->rt6i_idev->dev->ifindex == oif))
474                                                 continue;
475                                 }
476                                 local = sprt;
477                         }
478                 } else {
479                         if (ipv6_chk_addr(net, saddr, dev,
480                                           flags & RT6_LOOKUP_F_IFACE))
481                                 return sprt;
482                 }
483         }
484
485         if (oif) {
486                 if (local)
487                         return local;
488
489                 if (flags & RT6_LOOKUP_F_IFACE)
490                         return net->ipv6.ip6_null_entry;
491         }
492 out:
493         return rt;
494 }
495
496 #ifdef CONFIG_IPV6_ROUTER_PREF
497 static void rt6_probe(struct rt6_info *rt)
498 {
499         struct neighbour *neigh;
500         /*
501          * Okay, this does not seem to be appropriate
502          * for now, however, we need to check if it
503          * is really so; aka Router Reachability Probing.
504          *
505          * Router Reachability Probe MUST be rate-limited
506          * to no more than one per minute.
507          */
508         neigh = rt ? rt->n : NULL;
509         if (!neigh || (neigh->nud_state & NUD_VALID))
510                 return;
511         read_lock_bh(&neigh->lock);
512         if (!(neigh->nud_state & NUD_VALID) &&
513             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
514                 struct in6_addr mcaddr;
515                 struct in6_addr *target;
516
517                 neigh->updated = jiffies;
518                 read_unlock_bh(&neigh->lock);
519
520                 target = (struct in6_addr *)&neigh->primary_key;
521                 addrconf_addr_solict_mult(target, &mcaddr);
522                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
523         } else {
524                 read_unlock_bh(&neigh->lock);
525         }
526 }
527 #else
528 static inline void rt6_probe(struct rt6_info *rt)
529 {
530 }
531 #endif
532
533 /*
534  * Default Router Selection (RFC 2461 6.3.6)
535  */
536 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
537 {
538         struct net_device *dev = rt->dst.dev;
539         if (!oif || dev->ifindex == oif)
540                 return 2;
541         if ((dev->flags & IFF_LOOPBACK) &&
542             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543                 return 1;
544         return 0;
545 }
546
547 static inline int rt6_check_neigh(struct rt6_info *rt)
548 {
549         struct neighbour *neigh;
550         int m;
551
552         neigh = rt->n;
553         if (rt->rt6i_flags & RTF_NONEXTHOP ||
554             !(rt->rt6i_flags & RTF_GATEWAY))
555                 m = 1;
556         else if (neigh) {
557                 read_lock_bh(&neigh->lock);
558                 if (neigh->nud_state & NUD_VALID)
559                         m = 2;
560 #ifdef CONFIG_IPV6_ROUTER_PREF
561                 else if (neigh->nud_state & NUD_FAILED)
562                         m = 0;
563 #endif
564                 else
565                         m = 1;
566                 read_unlock_bh(&neigh->lock);
567         } else
568                 m = 0;
569         return m;
570 }
571
572 static int rt6_score_route(struct rt6_info *rt, int oif,
573                            int strict)
574 {
575         int m, n;
576
577         m = rt6_check_dev(rt, oif);
578         if (!m && (strict & RT6_LOOKUP_F_IFACE))
579                 return -1;
580 #ifdef CONFIG_IPV6_ROUTER_PREF
581         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
582 #endif
583         n = rt6_check_neigh(rt);
584         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
585                 return -1;
586         return m;
587 }
588
589 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
590                                    int *mpri, struct rt6_info *match)
591 {
592         int m;
593
594         if (rt6_check_expired(rt))
595                 goto out;
596
597         m = rt6_score_route(rt, oif, strict);
598         if (m < 0)
599                 goto out;
600
601         if (m > *mpri) {
602                 if (strict & RT6_LOOKUP_F_REACHABLE)
603                         rt6_probe(match);
604                 *mpri = m;
605                 match = rt;
606         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
607                 rt6_probe(rt);
608         }
609
610 out:
611         return match;
612 }
613
614 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
615                                      struct rt6_info *rr_head,
616                                      u32 metric, int oif, int strict)
617 {
618         struct rt6_info *rt, *match;
619         int mpri = -1;
620
621         match = NULL;
622         for (rt = rr_head; rt && rt->rt6i_metric == metric;
623              rt = rt->dst.rt6_next)
624                 match = find_match(rt, oif, strict, &mpri, match);
625         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
626              rt = rt->dst.rt6_next)
627                 match = find_match(rt, oif, strict, &mpri, match);
628
629         return match;
630 }
631
632 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
633 {
634         struct rt6_info *match, *rt0;
635         struct net *net;
636
637         rt0 = fn->rr_ptr;
638         if (!rt0)
639                 fn->rr_ptr = rt0 = fn->leaf;
640
641         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
642
643         if (!match &&
644             (strict & RT6_LOOKUP_F_REACHABLE)) {
645                 struct rt6_info *next = rt0->dst.rt6_next;
646
647                 /* no entries matched; do round-robin */
648                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
649                         next = fn->leaf;
650
651                 if (next != rt0)
652                         fn->rr_ptr = next;
653         }
654
655         net = dev_net(rt0->dst.dev);
656         return match ? match : net->ipv6.ip6_null_entry;
657 }
658
659 #ifdef CONFIG_IPV6_ROUTE_INFO
660 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
661                   const struct in6_addr *gwaddr)
662 {
663         struct net *net = dev_net(dev);
664         struct route_info *rinfo = (struct route_info *) opt;
665         struct in6_addr prefix_buf, *prefix;
666         unsigned int pref;
667         unsigned long lifetime;
668         struct rt6_info *rt;
669
670         if (len < sizeof(struct route_info)) {
671                 return -EINVAL;
672         }
673
674         /* Sanity check for prefix_len and length */
675         if (rinfo->length > 3) {
676                 return -EINVAL;
677         } else if (rinfo->prefix_len > 128) {
678                 return -EINVAL;
679         } else if (rinfo->prefix_len > 64) {
680                 if (rinfo->length < 2) {
681                         return -EINVAL;
682                 }
683         } else if (rinfo->prefix_len > 0) {
684                 if (rinfo->length < 1) {
685                         return -EINVAL;
686                 }
687         }
688
689         pref = rinfo->route_pref;
690         if (pref == ICMPV6_ROUTER_PREF_INVALID)
691                 return -EINVAL;
692
693         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
694
695         if (rinfo->length == 3)
696                 prefix = (struct in6_addr *)rinfo->prefix;
697         else {
698                 /* this function is safe */
699                 ipv6_addr_prefix(&prefix_buf,
700                                  (struct in6_addr *)rinfo->prefix,
701                                  rinfo->prefix_len);
702                 prefix = &prefix_buf;
703         }
704
705         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
706                                 dev->ifindex);
707
708         if (rt && !lifetime) {
709                 ip6_del_rt(rt);
710                 rt = NULL;
711         }
712
713         if (!rt && lifetime)
714                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
715                                         pref);
716         else if (rt)
717                 rt->rt6i_flags = RTF_ROUTEINFO |
718                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
719
720         if (rt) {
721                 if (!addrconf_finite_timeout(lifetime))
722                         rt6_clean_expires(rt);
723                 else
724                         rt6_set_expires(rt, jiffies + HZ * lifetime);
725
726                 ip6_rt_put(rt);
727         }
728         return 0;
729 }
730 #endif
731
732 #define BACKTRACK(__net, saddr)                 \
733 do { \
734         if (rt == __net->ipv6.ip6_null_entry) { \
735                 struct fib6_node *pn; \
736                 while (1) { \
737                         if (fn->fn_flags & RTN_TL_ROOT) \
738                                 goto out; \
739                         pn = fn->parent; \
740                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
741                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
742                         else \
743                                 fn = pn; \
744                         if (fn->fn_flags & RTN_RTINFO) \
745                                 goto restart; \
746                 } \
747         } \
748 } while (0)
749
750 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
751                                              struct fib6_table *table,
752                                              struct flowi6 *fl6, int flags)
753 {
754         struct fib6_node *fn;
755         struct rt6_info *rt;
756
757         read_lock_bh(&table->tb6_lock);
758         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
759 restart:
760         rt = fn->leaf;
761         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
762         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
763                 rt = rt6_multipath_select(rt, fl6);
764         BACKTRACK(net, &fl6->saddr);
765 out:
766         dst_use(&rt->dst, jiffies);
767         read_unlock_bh(&table->tb6_lock);
768         return rt;
769
770 }
771
772 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
773                                     int flags)
774 {
775         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
776 }
777 EXPORT_SYMBOL_GPL(ip6_route_lookup);
778
779 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
780                             const struct in6_addr *saddr, int oif, int strict)
781 {
782         struct flowi6 fl6 = {
783                 .flowi6_oif = oif,
784                 .daddr = *daddr,
785         };
786         struct dst_entry *dst;
787         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
788
789         if (saddr) {
790                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
791                 flags |= RT6_LOOKUP_F_HAS_SADDR;
792         }
793
794         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
795         if (dst->error == 0)
796                 return (struct rt6_info *) dst;
797
798         dst_release(dst);
799
800         return NULL;
801 }
802
803 EXPORT_SYMBOL(rt6_lookup);
804
805 /* ip6_ins_rt is called with FREE table->tb6_lock.
806    It takes new route entry, the addition fails by any reason the
807    route is freed. In any case, if caller does not hold it, it may
808    be destroyed.
809  */
810
811 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
812 {
813         int err;
814         struct fib6_table *table;
815
816         table = rt->rt6i_table;
817         write_lock_bh(&table->tb6_lock);
818         err = fib6_add(&table->tb6_root, rt, info);
819         write_unlock_bh(&table->tb6_lock);
820
821         return err;
822 }
823
824 int ip6_ins_rt(struct rt6_info *rt)
825 {
826         struct nl_info info = {
827                 .nl_net = dev_net(rt->dst.dev),
828         };
829         return __ip6_ins_rt(rt, &info);
830 }
831
832 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
833                                       const struct in6_addr *daddr,
834                                       const struct in6_addr *saddr)
835 {
836         struct rt6_info *rt;
837
838         /*
839          *      Clone the route.
840          */
841
842         rt = ip6_rt_copy(ort, daddr);
843
844         if (rt) {
845                 int attempts = !in_softirq();
846
847                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
848                         if (ort->rt6i_dst.plen != 128 &&
849                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
850                                 rt->rt6i_flags |= RTF_ANYCAST;
851                         rt->rt6i_gateway = *daddr;
852                 }
853
854                 rt->rt6i_flags |= RTF_CACHE;
855
856 #ifdef CONFIG_IPV6_SUBTREES
857                 if (rt->rt6i_src.plen && saddr) {
858                         rt->rt6i_src.addr = *saddr;
859                         rt->rt6i_src.plen = 128;
860                 }
861 #endif
862
863         retry:
864                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
865                         struct net *net = dev_net(rt->dst.dev);
866                         int saved_rt_min_interval =
867                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
868                         int saved_rt_elasticity =
869                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
870
871                         if (attempts-- > 0) {
872                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
873                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
874
875                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
876
877                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
878                                         saved_rt_elasticity;
879                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
880                                         saved_rt_min_interval;
881                                 goto retry;
882                         }
883
884                         net_warn_ratelimited("Neighbour table overflow\n");
885                         dst_free(&rt->dst);
886                         return NULL;
887                 }
888         }
889
890         return rt;
891 }
892
893 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
894                                         const struct in6_addr *daddr)
895 {
896         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
897
898         if (rt) {
899                 rt->rt6i_flags |= RTF_CACHE;
900                 rt->n = neigh_clone(ort->n);
901         }
902         return rt;
903 }
904
905 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
906                                       struct flowi6 *fl6, int flags)
907 {
908         struct fib6_node *fn;
909         struct rt6_info *rt, *nrt;
910         int strict = 0;
911         int attempts = 3;
912         int err;
913         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
914
915         strict |= flags & RT6_LOOKUP_F_IFACE;
916
917 relookup:
918         read_lock_bh(&table->tb6_lock);
919
920 restart_2:
921         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
922
923 restart:
924         rt = rt6_select(fn, oif, strict | reachable);
925         if (rt->rt6i_nsiblings && oif == 0)
926                 rt = rt6_multipath_select(rt, fl6);
927         BACKTRACK(net, &fl6->saddr);
928         if (rt == net->ipv6.ip6_null_entry ||
929             rt->rt6i_flags & RTF_CACHE)
930                 goto out;
931
932         dst_hold(&rt->dst);
933         read_unlock_bh(&table->tb6_lock);
934
935         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
936                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
937         else if (!(rt->dst.flags & DST_HOST))
938                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
939         else
940                 goto out2;
941
942         ip6_rt_put(rt);
943         rt = nrt ? : net->ipv6.ip6_null_entry;
944
945         dst_hold(&rt->dst);
946         if (nrt) {
947                 err = ip6_ins_rt(nrt);
948                 if (!err)
949                         goto out2;
950         }
951
952         if (--attempts <= 0)
953                 goto out2;
954
955         /*
956          * Race condition! In the gap, when table->tb6_lock was
957          * released someone could insert this route.  Relookup.
958          */
959         ip6_rt_put(rt);
960         goto relookup;
961
962 out:
963         if (reachable) {
964                 reachable = 0;
965                 goto restart_2;
966         }
967         dst_hold(&rt->dst);
968         read_unlock_bh(&table->tb6_lock);
969 out2:
970         rt->dst.lastuse = jiffies;
971         rt->dst.__use++;
972
973         return rt;
974 }
975
976 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
977                                             struct flowi6 *fl6, int flags)
978 {
979         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
980 }
981
982 static struct dst_entry *ip6_route_input_lookup(struct net *net,
983                                                 struct net_device *dev,
984                                                 struct flowi6 *fl6, int flags)
985 {
986         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
987                 flags |= RT6_LOOKUP_F_IFACE;
988
989         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
990 }
991
992 void ip6_route_input(struct sk_buff *skb)
993 {
994         const struct ipv6hdr *iph = ipv6_hdr(skb);
995         struct net *net = dev_net(skb->dev);
996         int flags = RT6_LOOKUP_F_HAS_SADDR;
997         struct flowi6 fl6 = {
998                 .flowi6_iif = skb->dev->ifindex,
999                 .daddr = iph->daddr,
1000                 .saddr = iph->saddr,
1001                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1002                 .flowi6_mark = skb->mark,
1003                 .flowi6_proto = iph->nexthdr,
1004         };
1005
1006         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1007 }
1008
1009 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1010                                              struct flowi6 *fl6, int flags)
1011 {
1012         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1013 }
1014
1015 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1016                                     struct flowi6 *fl6)
1017 {
1018         int flags = 0;
1019
1020         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1021
1022         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1023                 flags |= RT6_LOOKUP_F_IFACE;
1024
1025         if (!ipv6_addr_any(&fl6->saddr))
1026                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1027         else if (sk)
1028                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1029
1030         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1031 }
1032
1033 EXPORT_SYMBOL(ip6_route_output);
1034
1035 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1036 {
1037         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1038         struct dst_entry *new = NULL;
1039
1040         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1041         if (rt) {
1042                 new = &rt->dst;
1043
1044                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1045                 rt6_init_peer(rt, net->ipv6.peers);
1046
1047                 new->__use = 1;
1048                 new->input = dst_discard;
1049                 new->output = dst_discard;
1050
1051                 if (dst_metrics_read_only(&ort->dst))
1052                         new->_metrics = ort->dst._metrics;
1053                 else
1054                         dst_copy_metrics(new, &ort->dst);
1055                 rt->rt6i_idev = ort->rt6i_idev;
1056                 if (rt->rt6i_idev)
1057                         in6_dev_hold(rt->rt6i_idev);
1058
1059                 rt->rt6i_gateway = ort->rt6i_gateway;
1060                 rt->rt6i_flags = ort->rt6i_flags;
1061                 rt6_clean_expires(rt);
1062                 rt->rt6i_metric = 0;
1063
1064                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1065 #ifdef CONFIG_IPV6_SUBTREES
1066                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1067 #endif
1068
1069                 dst_free(new);
1070         }
1071
1072         dst_release(dst_orig);
1073         return new ? new : ERR_PTR(-ENOMEM);
1074 }
1075
1076 /*
1077  *      Destination cache support functions
1078  */
1079
1080 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1081 {
1082         struct rt6_info *rt;
1083
1084         rt = (struct rt6_info *) dst;
1085
1086         /* All IPV6 dsts are created with ->obsolete set to the value
1087          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1088          * into this function always.
1089          */
1090         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1091                 return NULL;
1092
1093         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1094                 return dst;
1095
1096         return NULL;
1097 }
1098
1099 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1100 {
1101         struct rt6_info *rt = (struct rt6_info *) dst;
1102
1103         if (rt) {
1104                 if (rt->rt6i_flags & RTF_CACHE) {
1105                         if (rt6_check_expired(rt)) {
1106                                 ip6_del_rt(rt);
1107                                 dst = NULL;
1108                         }
1109                 } else {
1110                         dst_release(dst);
1111                         dst = NULL;
1112                 }
1113         }
1114         return dst;
1115 }
1116
1117 static void ip6_link_failure(struct sk_buff *skb)
1118 {
1119         struct rt6_info *rt;
1120
1121         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1122
1123         rt = (struct rt6_info *) skb_dst(skb);
1124         if (rt) {
1125                 if (rt->rt6i_flags & RTF_CACHE)
1126                         rt6_update_expires(rt, 0);
1127                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1128                         rt->rt6i_node->fn_sernum = -1;
1129         }
1130 }
1131
1132 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1133                                struct sk_buff *skb, u32 mtu)
1134 {
1135         struct rt6_info *rt6 = (struct rt6_info*)dst;
1136
1137         dst_confirm(dst);
1138         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1139                 struct net *net = dev_net(dst->dev);
1140
1141                 rt6->rt6i_flags |= RTF_MODIFIED;
1142                 if (mtu < IPV6_MIN_MTU) {
1143                         u32 features = dst_metric(dst, RTAX_FEATURES);
1144                         mtu = IPV6_MIN_MTU;
1145                         features |= RTAX_FEATURE_ALLFRAG;
1146                         dst_metric_set(dst, RTAX_FEATURES, features);
1147                 }
1148                 dst_metric_set(dst, RTAX_MTU, mtu);
1149                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1150         }
1151 }
1152
1153 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1154                      int oif, u32 mark)
1155 {
1156         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1157         struct dst_entry *dst;
1158         struct flowi6 fl6;
1159
1160         memset(&fl6, 0, sizeof(fl6));
1161         fl6.flowi6_oif = oif;
1162         fl6.flowi6_mark = mark;
1163         fl6.flowi6_flags = 0;
1164         fl6.daddr = iph->daddr;
1165         fl6.saddr = iph->saddr;
1166         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1167
1168         dst = ip6_route_output(net, NULL, &fl6);
1169         if (!dst->error)
1170                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1171         dst_release(dst);
1172 }
1173 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1174
1175 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1176 {
1177         ip6_update_pmtu(skb, sock_net(sk), mtu,
1178                         sk->sk_bound_dev_if, sk->sk_mark);
1179 }
1180 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1181
1182 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1183 {
1184         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1185         struct dst_entry *dst;
1186         struct flowi6 fl6;
1187
1188         memset(&fl6, 0, sizeof(fl6));
1189         fl6.flowi6_oif = oif;
1190         fl6.flowi6_mark = mark;
1191         fl6.flowi6_flags = 0;
1192         fl6.daddr = iph->daddr;
1193         fl6.saddr = iph->saddr;
1194         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1195
1196         dst = ip6_route_output(net, NULL, &fl6);
1197         if (!dst->error)
1198                 rt6_do_redirect(dst, NULL, skb);
1199         dst_release(dst);
1200 }
1201 EXPORT_SYMBOL_GPL(ip6_redirect);
1202
1203 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1204 {
1205         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1206 }
1207 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1208
1209 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1210 {
1211         struct net_device *dev = dst->dev;
1212         unsigned int mtu = dst_mtu(dst);
1213         struct net *net = dev_net(dev);
1214
1215         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1216
1217         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1218                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1219
1220         /*
1221          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1222          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1223          * IPV6_MAXPLEN is also valid and means: "any MSS,
1224          * rely only on pmtu discovery"
1225          */
1226         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1227                 mtu = IPV6_MAXPLEN;
1228         return mtu;
1229 }
1230
1231 static unsigned int ip6_mtu(const struct dst_entry *dst)
1232 {
1233         struct inet6_dev *idev;
1234         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1235
1236         if (mtu)
1237                 return mtu;
1238
1239         mtu = IPV6_MIN_MTU;
1240
1241         rcu_read_lock();
1242         idev = __in6_dev_get(dst->dev);
1243         if (idev)
1244                 mtu = idev->cnf.mtu6;
1245         rcu_read_unlock();
1246
1247         return mtu;
1248 }
1249
1250 static struct dst_entry *icmp6_dst_gc_list;
1251 static DEFINE_SPINLOCK(icmp6_dst_lock);
1252
1253 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1254                                   struct neighbour *neigh,
1255                                   struct flowi6 *fl6)
1256 {
1257         struct dst_entry *dst;
1258         struct rt6_info *rt;
1259         struct inet6_dev *idev = in6_dev_get(dev);
1260         struct net *net = dev_net(dev);
1261
1262         if (unlikely(!idev))
1263                 return ERR_PTR(-ENODEV);
1264
1265         rt = ip6_dst_alloc(net, dev, 0, NULL);
1266         if (unlikely(!rt)) {
1267                 in6_dev_put(idev);
1268                 dst = ERR_PTR(-ENOMEM);
1269                 goto out;
1270         }
1271
1272         if (neigh)
1273                 neigh_hold(neigh);
1274         else {
1275                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1276                 if (IS_ERR(neigh)) {
1277                         in6_dev_put(idev);
1278                         dst_free(&rt->dst);
1279                         return ERR_CAST(neigh);
1280                 }
1281         }
1282
1283         rt->dst.flags |= DST_HOST;
1284         rt->dst.output  = ip6_output;
1285         rt->n = neigh;
1286         atomic_set(&rt->dst.__refcnt, 1);
1287         rt->rt6i_dst.addr = fl6->daddr;
1288         rt->rt6i_dst.plen = 128;
1289         rt->rt6i_idev     = idev;
1290         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1291
1292         spin_lock_bh(&icmp6_dst_lock);
1293         rt->dst.next = icmp6_dst_gc_list;
1294         icmp6_dst_gc_list = &rt->dst;
1295         spin_unlock_bh(&icmp6_dst_lock);
1296
1297         fib6_force_start_gc(net);
1298
1299         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1300
1301 out:
1302         return dst;
1303 }
1304
1305 int icmp6_dst_gc(void)
1306 {
1307         struct dst_entry *dst, **pprev;
1308         int more = 0;
1309
1310         spin_lock_bh(&icmp6_dst_lock);
1311         pprev = &icmp6_dst_gc_list;
1312
1313         while ((dst = *pprev) != NULL) {
1314                 if (!atomic_read(&dst->__refcnt)) {
1315                         *pprev = dst->next;
1316                         dst_free(dst);
1317                 } else {
1318                         pprev = &dst->next;
1319                         ++more;
1320                 }
1321         }
1322
1323         spin_unlock_bh(&icmp6_dst_lock);
1324
1325         return more;
1326 }
1327
1328 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1329                             void *arg)
1330 {
1331         struct dst_entry *dst, **pprev;
1332
1333         spin_lock_bh(&icmp6_dst_lock);
1334         pprev = &icmp6_dst_gc_list;
1335         while ((dst = *pprev) != NULL) {
1336                 struct rt6_info *rt = (struct rt6_info *) dst;
1337                 if (func(rt, arg)) {
1338                         *pprev = dst->next;
1339                         dst_free(dst);
1340                 } else {
1341                         pprev = &dst->next;
1342                 }
1343         }
1344         spin_unlock_bh(&icmp6_dst_lock);
1345 }
1346
1347 static int ip6_dst_gc(struct dst_ops *ops)
1348 {
1349         unsigned long now = jiffies;
1350         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1351         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1352         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1353         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1354         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1355         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1356         int entries;
1357
1358         entries = dst_entries_get_fast(ops);
1359         if (time_after(rt_last_gc + rt_min_interval, now) &&
1360             entries <= rt_max_size)
1361                 goto out;
1362
1363         net->ipv6.ip6_rt_gc_expire++;
1364         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1365         net->ipv6.ip6_rt_last_gc = now;
1366         entries = dst_entries_get_slow(ops);
1367         if (entries < ops->gc_thresh)
1368                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1369 out:
1370         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1371         return entries > rt_max_size;
1372 }
1373
1374 int ip6_dst_hoplimit(struct dst_entry *dst)
1375 {
1376         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1377         if (hoplimit == 0) {
1378                 struct net_device *dev = dst->dev;
1379                 struct inet6_dev *idev;
1380
1381                 rcu_read_lock();
1382                 idev = __in6_dev_get(dev);
1383                 if (idev)
1384                         hoplimit = idev->cnf.hop_limit;
1385                 else
1386                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1387                 rcu_read_unlock();
1388         }
1389         return hoplimit;
1390 }
1391 EXPORT_SYMBOL(ip6_dst_hoplimit);
1392
1393 /*
1394  *
1395  */
1396
1397 int ip6_route_add(struct fib6_config *cfg)
1398 {
1399         int err;
1400         struct net *net = cfg->fc_nlinfo.nl_net;
1401         struct rt6_info *rt = NULL;
1402         struct net_device *dev = NULL;
1403         struct inet6_dev *idev = NULL;
1404         struct fib6_table *table;
1405         int addr_type;
1406
1407         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1408                 return -EINVAL;
1409 #ifndef CONFIG_IPV6_SUBTREES
1410         if (cfg->fc_src_len)
1411                 return -EINVAL;
1412 #endif
1413         if (cfg->fc_ifindex) {
1414                 err = -ENODEV;
1415                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1416                 if (!dev)
1417                         goto out;
1418                 idev = in6_dev_get(dev);
1419                 if (!idev)
1420                         goto out;
1421         }
1422
1423         if (cfg->fc_metric == 0)
1424                 cfg->fc_metric = IP6_RT_PRIO_USER;
1425
1426         err = -ENOBUFS;
1427         if (cfg->fc_nlinfo.nlh &&
1428             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1429                 table = fib6_get_table(net, cfg->fc_table);
1430                 if (!table) {
1431                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1432                         table = fib6_new_table(net, cfg->fc_table);
1433                 }
1434         } else {
1435                 table = fib6_new_table(net, cfg->fc_table);
1436         }
1437
1438         if (!table)
1439                 goto out;
1440
1441         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1442
1443         if (!rt) {
1444                 err = -ENOMEM;
1445                 goto out;
1446         }
1447
1448         if (cfg->fc_flags & RTF_EXPIRES)
1449                 rt6_set_expires(rt, jiffies +
1450                                 clock_t_to_jiffies(cfg->fc_expires));
1451         else
1452                 rt6_clean_expires(rt);
1453
1454         if (cfg->fc_protocol == RTPROT_UNSPEC)
1455                 cfg->fc_protocol = RTPROT_BOOT;
1456         rt->rt6i_protocol = cfg->fc_protocol;
1457
1458         addr_type = ipv6_addr_type(&cfg->fc_dst);
1459
1460         if (addr_type & IPV6_ADDR_MULTICAST)
1461                 rt->dst.input = ip6_mc_input;
1462         else if (cfg->fc_flags & RTF_LOCAL)
1463                 rt->dst.input = ip6_input;
1464         else
1465                 rt->dst.input = ip6_forward;
1466
1467         rt->dst.output = ip6_output;
1468
1469         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1470         rt->rt6i_dst.plen = cfg->fc_dst_len;
1471         if (rt->rt6i_dst.plen == 128)
1472                rt->dst.flags |= DST_HOST;
1473
1474         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1475                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1476                 if (!metrics) {
1477                         err = -ENOMEM;
1478                         goto out;
1479                 }
1480                 dst_init_metrics(&rt->dst, metrics, 0);
1481         }
1482 #ifdef CONFIG_IPV6_SUBTREES
1483         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1484         rt->rt6i_src.plen = cfg->fc_src_len;
1485 #endif
1486
1487         rt->rt6i_metric = cfg->fc_metric;
1488
1489         /* We cannot add true routes via loopback here,
1490            they would result in kernel looping; promote them to reject routes
1491          */
1492         if ((cfg->fc_flags & RTF_REJECT) ||
1493             (dev && (dev->flags & IFF_LOOPBACK) &&
1494              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1495              !(cfg->fc_flags & RTF_LOCAL))) {
1496                 /* hold loopback dev/idev if we haven't done so. */
1497                 if (dev != net->loopback_dev) {
1498                         if (dev) {
1499                                 dev_put(dev);
1500                                 in6_dev_put(idev);
1501                         }
1502                         dev = net->loopback_dev;
1503                         dev_hold(dev);
1504                         idev = in6_dev_get(dev);
1505                         if (!idev) {
1506                                 err = -ENODEV;
1507                                 goto out;
1508                         }
1509                 }
1510                 rt->dst.output = ip6_pkt_discard_out;
1511                 rt->dst.input = ip6_pkt_discard;
1512                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1513                 switch (cfg->fc_type) {
1514                 case RTN_BLACKHOLE:
1515                         rt->dst.error = -EINVAL;
1516                         break;
1517                 case RTN_PROHIBIT:
1518                         rt->dst.error = -EACCES;
1519                         break;
1520                 case RTN_THROW:
1521                         rt->dst.error = -EAGAIN;
1522                         break;
1523                 default:
1524                         rt->dst.error = -ENETUNREACH;
1525                         break;
1526                 }
1527                 goto install_route;
1528         }
1529
1530         if (cfg->fc_flags & RTF_GATEWAY) {
1531                 const struct in6_addr *gw_addr;
1532                 int gwa_type;
1533
1534                 gw_addr = &cfg->fc_gateway;
1535                 rt->rt6i_gateway = *gw_addr;
1536                 gwa_type = ipv6_addr_type(gw_addr);
1537
1538                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1539                         struct rt6_info *grt;
1540
1541                         /* IPv6 strictly inhibits using not link-local
1542                            addresses as nexthop address.
1543                            Otherwise, router will not able to send redirects.
1544                            It is very good, but in some (rare!) circumstances
1545                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1546                            some exceptions. --ANK
1547                          */
1548                         err = -EINVAL;
1549                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1550                                 goto out;
1551
1552                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1553
1554                         err = -EHOSTUNREACH;
1555                         if (!grt)
1556                                 goto out;
1557                         if (dev) {
1558                                 if (dev != grt->dst.dev) {
1559                                         ip6_rt_put(grt);
1560                                         goto out;
1561                                 }
1562                         } else {
1563                                 dev = grt->dst.dev;
1564                                 idev = grt->rt6i_idev;
1565                                 dev_hold(dev);
1566                                 in6_dev_hold(grt->rt6i_idev);
1567                         }
1568                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1569                                 err = 0;
1570                         ip6_rt_put(grt);
1571
1572                         if (err)
1573                                 goto out;
1574                 }
1575                 err = -EINVAL;
1576                 if (!dev || (dev->flags & IFF_LOOPBACK))
1577                         goto out;
1578         }
1579
1580         err = -ENODEV;
1581         if (!dev)
1582                 goto out;
1583
1584         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1585                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1586                         err = -EINVAL;
1587                         goto out;
1588                 }
1589                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1590                 rt->rt6i_prefsrc.plen = 128;
1591         } else
1592                 rt->rt6i_prefsrc.plen = 0;
1593
1594         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1595                 err = rt6_bind_neighbour(rt, dev);
1596                 if (err)
1597                         goto out;
1598         }
1599
1600         rt->rt6i_flags = cfg->fc_flags;
1601
1602 install_route:
1603         if (cfg->fc_mx) {
1604                 struct nlattr *nla;
1605                 int remaining;
1606
1607                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1608                         int type = nla_type(nla);
1609
1610                         if (type) {
1611                                 if (type > RTAX_MAX) {
1612                                         err = -EINVAL;
1613                                         goto out;
1614                                 }
1615
1616                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1617                         }
1618                 }
1619         }
1620
1621         rt->dst.dev = dev;
1622         rt->rt6i_idev = idev;
1623         rt->rt6i_table = table;
1624
1625         cfg->fc_nlinfo.nl_net = dev_net(dev);
1626
1627         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1628
1629 out:
1630         if (dev)
1631                 dev_put(dev);
1632         if (idev)
1633                 in6_dev_put(idev);
1634         if (rt)
1635                 dst_free(&rt->dst);
1636         return err;
1637 }
1638
1639 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1640 {
1641         int err;
1642         struct fib6_table *table;
1643         struct net *net = dev_net(rt->dst.dev);
1644
1645         if (rt == net->ipv6.ip6_null_entry) {
1646                 err = -ENOENT;
1647                 goto out;
1648         }
1649
1650         table = rt->rt6i_table;
1651         write_lock_bh(&table->tb6_lock);
1652         err = fib6_del(rt, info);
1653         write_unlock_bh(&table->tb6_lock);
1654
1655 out:
1656         ip6_rt_put(rt);
1657         return err;
1658 }
1659
1660 int ip6_del_rt(struct rt6_info *rt)
1661 {
1662         struct nl_info info = {
1663                 .nl_net = dev_net(rt->dst.dev),
1664         };
1665         return __ip6_del_rt(rt, &info);
1666 }
1667
1668 static int ip6_route_del(struct fib6_config *cfg)
1669 {
1670         struct fib6_table *table;
1671         struct fib6_node *fn;
1672         struct rt6_info *rt;
1673         int err = -ESRCH;
1674
1675         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1676         if (!table)
1677                 return err;
1678
1679         read_lock_bh(&table->tb6_lock);
1680
1681         fn = fib6_locate(&table->tb6_root,
1682                          &cfg->fc_dst, cfg->fc_dst_len,
1683                          &cfg->fc_src, cfg->fc_src_len);
1684
1685         if (fn) {
1686                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1687                         if (cfg->fc_ifindex &&
1688                             (!rt->dst.dev ||
1689                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1690                                 continue;
1691                         if (cfg->fc_flags & RTF_GATEWAY &&
1692                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1693                                 continue;
1694                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1695                                 continue;
1696                         dst_hold(&rt->dst);
1697                         read_unlock_bh(&table->tb6_lock);
1698
1699                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1700                 }
1701         }
1702         read_unlock_bh(&table->tb6_lock);
1703
1704         return err;
1705 }
1706
1707 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1708 {
1709         struct net *net = dev_net(skb->dev);
1710         struct netevent_redirect netevent;
1711         struct rt6_info *rt, *nrt = NULL;
1712         const struct in6_addr *target;
1713         struct ndisc_options ndopts;
1714         const struct in6_addr *dest;
1715         struct neighbour *old_neigh;
1716         struct inet6_dev *in6_dev;
1717         struct neighbour *neigh;
1718         struct icmp6hdr *icmph;
1719         int optlen, on_link;
1720         u8 *lladdr;
1721
1722         optlen = skb->tail - skb->transport_header;
1723         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1724
1725         if (optlen < 0) {
1726                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1727                 return;
1728         }
1729
1730         icmph = icmp6_hdr(skb);
1731         target = (const struct in6_addr *) (icmph + 1);
1732         dest = target + 1;
1733
1734         if (ipv6_addr_is_multicast(dest)) {
1735                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1736                 return;
1737         }
1738
1739         on_link = 0;
1740         if (ipv6_addr_equal(dest, target)) {
1741                 on_link = 1;
1742         } else if (ipv6_addr_type(target) !=
1743                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1744                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1745                 return;
1746         }
1747
1748         in6_dev = __in6_dev_get(skb->dev);
1749         if (!in6_dev)
1750                 return;
1751         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1752                 return;
1753
1754         /* RFC2461 8.1:
1755          *      The IP source address of the Redirect MUST be the same as the current
1756          *      first-hop router for the specified ICMP Destination Address.
1757          */
1758
1759         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1760                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1761                 return;
1762         }
1763
1764         lladdr = NULL;
1765         if (ndopts.nd_opts_tgt_lladdr) {
1766                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1767                                              skb->dev);
1768                 if (!lladdr) {
1769                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1770                         return;
1771                 }
1772         }
1773
1774         rt = (struct rt6_info *) dst;
1775         if (rt == net->ipv6.ip6_null_entry) {
1776                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1777                 return;
1778         }
1779
1780         /* Redirect received -> path was valid.
1781          * Look, redirects are sent only in response to data packets,
1782          * so that this nexthop apparently is reachable. --ANK
1783          */
1784         dst_confirm(&rt->dst);
1785
1786         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1787         if (!neigh)
1788                 return;
1789
1790         /* Duplicate redirect: silently ignore. */
1791         old_neigh = rt->n;
1792         if (neigh == old_neigh)
1793                 goto out;
1794
1795         /*
1796          *      We have finally decided to accept it.
1797          */
1798
1799         neigh_update(neigh, lladdr, NUD_STALE,
1800                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1801                      NEIGH_UPDATE_F_OVERRIDE|
1802                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1803                                      NEIGH_UPDATE_F_ISROUTER))
1804                      );
1805
1806         nrt = ip6_rt_copy(rt, dest);
1807         if (!nrt)
1808                 goto out;
1809
1810         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1811         if (on_link)
1812                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1813
1814         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1815         nrt->n = neigh_clone(neigh);
1816
1817         if (ip6_ins_rt(nrt))
1818                 goto out;
1819
1820         netevent.old = &rt->dst;
1821         netevent.old_neigh = old_neigh;
1822         netevent.new = &nrt->dst;
1823         netevent.new_neigh = neigh;
1824         netevent.daddr = dest;
1825         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1826
1827         if (rt->rt6i_flags & RTF_CACHE) {
1828                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1829                 ip6_del_rt(rt);
1830         }
1831
1832 out:
1833         neigh_release(neigh);
1834 }
1835
1836 /*
1837  *      Misc support functions
1838  */
1839
1840 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1841                                     const struct in6_addr *dest)
1842 {
1843         struct net *net = dev_net(ort->dst.dev);
1844         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1845                                             ort->rt6i_table);
1846
1847         if (rt) {
1848                 rt->dst.input = ort->dst.input;
1849                 rt->dst.output = ort->dst.output;
1850                 rt->dst.flags |= DST_HOST;
1851
1852                 rt->rt6i_dst.addr = *dest;
1853                 rt->rt6i_dst.plen = 128;
1854                 dst_copy_metrics(&rt->dst, &ort->dst);
1855                 rt->dst.error = ort->dst.error;
1856                 rt->rt6i_idev = ort->rt6i_idev;
1857                 if (rt->rt6i_idev)
1858                         in6_dev_hold(rt->rt6i_idev);
1859                 rt->dst.lastuse = jiffies;
1860
1861                 rt->rt6i_gateway = ort->rt6i_gateway;
1862                 rt->rt6i_flags = ort->rt6i_flags;
1863                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1864                     (RTF_DEFAULT | RTF_ADDRCONF))
1865                         rt6_set_from(rt, ort);
1866                 else
1867                         rt6_clean_expires(rt);
1868                 rt->rt6i_metric = 0;
1869
1870 #ifdef CONFIG_IPV6_SUBTREES
1871                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1872 #endif
1873                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1874                 rt->rt6i_table = ort->rt6i_table;
1875         }
1876         return rt;
1877 }
1878
1879 #ifdef CONFIG_IPV6_ROUTE_INFO
1880 static struct rt6_info *rt6_get_route_info(struct net *net,
1881                                            const struct in6_addr *prefix, int prefixlen,
1882                                            const struct in6_addr *gwaddr, int ifindex)
1883 {
1884         struct fib6_node *fn;
1885         struct rt6_info *rt = NULL;
1886         struct fib6_table *table;
1887
1888         table = fib6_get_table(net, RT6_TABLE_INFO);
1889         if (!table)
1890                 return NULL;
1891
1892         read_lock_bh(&table->tb6_lock);
1893         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1894         if (!fn)
1895                 goto out;
1896
1897         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1898                 if (rt->dst.dev->ifindex != ifindex)
1899                         continue;
1900                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1901                         continue;
1902                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1903                         continue;
1904                 dst_hold(&rt->dst);
1905                 break;
1906         }
1907 out:
1908         read_unlock_bh(&table->tb6_lock);
1909         return rt;
1910 }
1911
1912 static struct rt6_info *rt6_add_route_info(struct net *net,
1913                                            const struct in6_addr *prefix, int prefixlen,
1914                                            const struct in6_addr *gwaddr, int ifindex,
1915                                            unsigned int pref)
1916 {
1917         struct fib6_config cfg = {
1918                 .fc_table       = RT6_TABLE_INFO,
1919                 .fc_metric      = IP6_RT_PRIO_USER,
1920                 .fc_ifindex     = ifindex,
1921                 .fc_dst_len     = prefixlen,
1922                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1923                                   RTF_UP | RTF_PREF(pref),
1924                 .fc_nlinfo.portid = 0,
1925                 .fc_nlinfo.nlh = NULL,
1926                 .fc_nlinfo.nl_net = net,
1927         };
1928
1929         cfg.fc_dst = *prefix;
1930         cfg.fc_gateway = *gwaddr;
1931
1932         /* We should treat it as a default route if prefix length is 0. */
1933         if (!prefixlen)
1934                 cfg.fc_flags |= RTF_DEFAULT;
1935
1936         ip6_route_add(&cfg);
1937
1938         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1939 }
1940 #endif
1941
1942 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1943 {
1944         struct rt6_info *rt;
1945         struct fib6_table *table;
1946
1947         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1948         if (!table)
1949                 return NULL;
1950
1951         read_lock_bh(&table->tb6_lock);
1952         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1953                 if (dev == rt->dst.dev &&
1954                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1955                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1956                         break;
1957         }
1958         if (rt)
1959                 dst_hold(&rt->dst);
1960         read_unlock_bh(&table->tb6_lock);
1961         return rt;
1962 }
1963
1964 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1965                                      struct net_device *dev,
1966                                      unsigned int pref)
1967 {
1968         struct fib6_config cfg = {
1969                 .fc_table       = RT6_TABLE_DFLT,
1970                 .fc_metric      = IP6_RT_PRIO_USER,
1971                 .fc_ifindex     = dev->ifindex,
1972                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1973                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1974                 .fc_nlinfo.portid = 0,
1975                 .fc_nlinfo.nlh = NULL,
1976                 .fc_nlinfo.nl_net = dev_net(dev),
1977         };
1978
1979         cfg.fc_gateway = *gwaddr;
1980
1981         ip6_route_add(&cfg);
1982
1983         return rt6_get_dflt_router(gwaddr, dev);
1984 }
1985
1986 void rt6_purge_dflt_routers(struct net *net)
1987 {
1988         struct rt6_info *rt;
1989         struct fib6_table *table;
1990
1991         /* NOTE: Keep consistent with rt6_get_dflt_router */
1992         table = fib6_get_table(net, RT6_TABLE_DFLT);
1993         if (!table)
1994                 return;
1995
1996 restart:
1997         read_lock_bh(&table->tb6_lock);
1998         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1999                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2000                         dst_hold(&rt->dst);
2001                         read_unlock_bh(&table->tb6_lock);
2002                         ip6_del_rt(rt);
2003                         goto restart;
2004                 }
2005         }
2006         read_unlock_bh(&table->tb6_lock);
2007 }
2008
2009 static void rtmsg_to_fib6_config(struct net *net,
2010                                  struct in6_rtmsg *rtmsg,
2011                                  struct fib6_config *cfg)
2012 {
2013         memset(cfg, 0, sizeof(*cfg));
2014
2015         cfg->fc_table = RT6_TABLE_MAIN;
2016         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2017         cfg->fc_metric = rtmsg->rtmsg_metric;
2018         cfg->fc_expires = rtmsg->rtmsg_info;
2019         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2020         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2021         cfg->fc_flags = rtmsg->rtmsg_flags;
2022
2023         cfg->fc_nlinfo.nl_net = net;
2024
2025         cfg->fc_dst = rtmsg->rtmsg_dst;
2026         cfg->fc_src = rtmsg->rtmsg_src;
2027         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2028 }
2029
2030 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2031 {
2032         struct fib6_config cfg;
2033         struct in6_rtmsg rtmsg;
2034         int err;
2035
2036         switch(cmd) {
2037         case SIOCADDRT:         /* Add a route */
2038         case SIOCDELRT:         /* Delete a route */
2039                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2040                         return -EPERM;
2041                 err = copy_from_user(&rtmsg, arg,
2042                                      sizeof(struct in6_rtmsg));
2043                 if (err)
2044                         return -EFAULT;
2045
2046                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2047
2048                 rtnl_lock();
2049                 switch (cmd) {
2050                 case SIOCADDRT:
2051                         err = ip6_route_add(&cfg);
2052                         break;
2053                 case SIOCDELRT:
2054                         err = ip6_route_del(&cfg);
2055                         break;
2056                 default:
2057                         err = -EINVAL;
2058                 }
2059                 rtnl_unlock();
2060
2061                 return err;
2062         }
2063
2064         return -EINVAL;
2065 }
2066
2067 /*
2068  *      Drop the packet on the floor
2069  */
2070
2071 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2072 {
2073         int type;
2074         struct dst_entry *dst = skb_dst(skb);
2075         switch (ipstats_mib_noroutes) {
2076         case IPSTATS_MIB_INNOROUTES:
2077                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2078                 if (type == IPV6_ADDR_ANY) {
2079                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2080                                       IPSTATS_MIB_INADDRERRORS);
2081                         break;
2082                 }
2083                 /* FALLTHROUGH */
2084         case IPSTATS_MIB_OUTNOROUTES:
2085                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2086                               ipstats_mib_noroutes);
2087                 break;
2088         }
2089         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2090         kfree_skb(skb);
2091         return 0;
2092 }
2093
2094 static int ip6_pkt_discard(struct sk_buff *skb)
2095 {
2096         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2097 }
2098
2099 static int ip6_pkt_discard_out(struct sk_buff *skb)
2100 {
2101         skb->dev = skb_dst(skb)->dev;
2102         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2103 }
2104
2105 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2106
2107 static int ip6_pkt_prohibit(struct sk_buff *skb)
2108 {
2109         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2110 }
2111
2112 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2113 {
2114         skb->dev = skb_dst(skb)->dev;
2115         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2116 }
2117
2118 #endif
2119
2120 /*
2121  *      Allocate a dst for local (unicast / anycast) address.
2122  */
2123
2124 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2125                                     const struct in6_addr *addr,
2126                                     bool anycast)
2127 {
2128         struct net *net = dev_net(idev->dev);
2129         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2130         int err;
2131
2132         if (!rt) {
2133                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2134                 return ERR_PTR(-ENOMEM);
2135         }
2136
2137         in6_dev_hold(idev);
2138
2139         rt->dst.flags |= DST_HOST;
2140         rt->dst.input = ip6_input;
2141         rt->dst.output = ip6_output;
2142         rt->rt6i_idev = idev;
2143
2144         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2145         if (anycast)
2146                 rt->rt6i_flags |= RTF_ANYCAST;
2147         else
2148                 rt->rt6i_flags |= RTF_LOCAL;
2149         err = rt6_bind_neighbour(rt, rt->dst.dev);
2150         if (err) {
2151                 dst_free(&rt->dst);
2152                 return ERR_PTR(err);
2153         }
2154
2155         rt->rt6i_dst.addr = *addr;
2156         rt->rt6i_dst.plen = 128;
2157         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2158
2159         atomic_set(&rt->dst.__refcnt, 1);
2160
2161         return rt;
2162 }
2163
2164 int ip6_route_get_saddr(struct net *net,
2165                         struct rt6_info *rt,
2166                         const struct in6_addr *daddr,
2167                         unsigned int prefs,
2168                         struct in6_addr *saddr)
2169 {
2170         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2171         int err = 0;
2172         if (rt->rt6i_prefsrc.plen)
2173                 *saddr = rt->rt6i_prefsrc.addr;
2174         else
2175                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2176                                          daddr, prefs, saddr);
2177         return err;
2178 }
2179
2180 /* remove deleted ip from prefsrc entries */
2181 struct arg_dev_net_ip {
2182         struct net_device *dev;
2183         struct net *net;
2184         struct in6_addr *addr;
2185 };
2186
2187 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2188 {
2189         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2190         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2191         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2192
2193         if (((void *)rt->dst.dev == dev || !dev) &&
2194             rt != net->ipv6.ip6_null_entry &&
2195             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2196                 /* remove prefsrc entry */
2197                 rt->rt6i_prefsrc.plen = 0;
2198         }
2199         return 0;
2200 }
2201
2202 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2203 {
2204         struct net *net = dev_net(ifp->idev->dev);
2205         struct arg_dev_net_ip adni = {
2206                 .dev = ifp->idev->dev,
2207                 .net = net,
2208                 .addr = &ifp->addr,
2209         };
2210         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2211 }
2212
2213 struct arg_dev_net {
2214         struct net_device *dev;
2215         struct net *net;
2216 };
2217
2218 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2219 {
2220         const struct arg_dev_net *adn = arg;
2221         const struct net_device *dev = adn->dev;
2222
2223         if ((rt->dst.dev == dev || !dev) &&
2224             rt != adn->net->ipv6.ip6_null_entry)
2225                 return -1;
2226
2227         return 0;
2228 }
2229
2230 void rt6_ifdown(struct net *net, struct net_device *dev)
2231 {
2232         struct arg_dev_net adn = {
2233                 .dev = dev,
2234                 .net = net,
2235         };
2236
2237         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2238         icmp6_clean_all(fib6_ifdown, &adn);
2239 }
2240
2241 struct rt6_mtu_change_arg {
2242         struct net_device *dev;
2243         unsigned int mtu;
2244 };
2245
2246 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2247 {
2248         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2249         struct inet6_dev *idev;
2250
2251         /* In IPv6 pmtu discovery is not optional,
2252            so that RTAX_MTU lock cannot disable it.
2253            We still use this lock to block changes
2254            caused by addrconf/ndisc.
2255         */
2256
2257         idev = __in6_dev_get(arg->dev);
2258         if (!idev)
2259                 return 0;
2260
2261         /* For administrative MTU increase, there is no way to discover
2262            IPv6 PMTU increase, so PMTU increase should be updated here.
2263            Since RFC 1981 doesn't include administrative MTU increase
2264            update PMTU increase is a MUST. (i.e. jumbo frame)
2265          */
2266         /*
2267            If new MTU is less than route PMTU, this new MTU will be the
2268            lowest MTU in the path, update the route PMTU to reflect PMTU
2269            decreases; if new MTU is greater than route PMTU, and the
2270            old MTU is the lowest MTU in the path, update the route PMTU
2271            to reflect the increase. In this case if the other nodes' MTU
2272            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2273            PMTU discouvery.
2274          */
2275         if (rt->dst.dev == arg->dev &&
2276             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2277             (dst_mtu(&rt->dst) >= arg->mtu ||
2278              (dst_mtu(&rt->dst) < arg->mtu &&
2279               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2280                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2281         }
2282         return 0;
2283 }
2284
2285 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2286 {
2287         struct rt6_mtu_change_arg arg = {
2288                 .dev = dev,
2289                 .mtu = mtu,
2290         };
2291
2292         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2293 }
2294
2295 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2296         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2297         [RTA_OIF]               = { .type = NLA_U32 },
2298         [RTA_IIF]               = { .type = NLA_U32 },
2299         [RTA_PRIORITY]          = { .type = NLA_U32 },
2300         [RTA_METRICS]           = { .type = NLA_NESTED },
2301         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2302 };
2303
2304 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2305                               struct fib6_config *cfg)
2306 {
2307         struct rtmsg *rtm;
2308         struct nlattr *tb[RTA_MAX+1];
2309         int err;
2310
2311         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2312         if (err < 0)
2313                 goto errout;
2314
2315         err = -EINVAL;
2316         rtm = nlmsg_data(nlh);
2317         memset(cfg, 0, sizeof(*cfg));
2318
2319         cfg->fc_table = rtm->rtm_table;
2320         cfg->fc_dst_len = rtm->rtm_dst_len;
2321         cfg->fc_src_len = rtm->rtm_src_len;
2322         cfg->fc_flags = RTF_UP;
2323         cfg->fc_protocol = rtm->rtm_protocol;
2324         cfg->fc_type = rtm->rtm_type;
2325
2326         if (rtm->rtm_type == RTN_UNREACHABLE ||
2327             rtm->rtm_type == RTN_BLACKHOLE ||
2328             rtm->rtm_type == RTN_PROHIBIT ||
2329             rtm->rtm_type == RTN_THROW)
2330                 cfg->fc_flags |= RTF_REJECT;
2331
2332         if (rtm->rtm_type == RTN_LOCAL)
2333                 cfg->fc_flags |= RTF_LOCAL;
2334
2335         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2336         cfg->fc_nlinfo.nlh = nlh;
2337         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2338
2339         if (tb[RTA_GATEWAY]) {
2340                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2341                 cfg->fc_flags |= RTF_GATEWAY;
2342         }
2343
2344         if (tb[RTA_DST]) {
2345                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2346
2347                 if (nla_len(tb[RTA_DST]) < plen)
2348                         goto errout;
2349
2350                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2351         }
2352
2353         if (tb[RTA_SRC]) {
2354                 int plen = (rtm->rtm_src_len + 7) >> 3;
2355
2356                 if (nla_len(tb[RTA_SRC]) < plen)
2357                         goto errout;
2358
2359                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2360         }
2361
2362         if (tb[RTA_PREFSRC])
2363                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2364
2365         if (tb[RTA_OIF])
2366                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2367
2368         if (tb[RTA_PRIORITY])
2369                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2370
2371         if (tb[RTA_METRICS]) {
2372                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2373                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2374         }
2375
2376         if (tb[RTA_TABLE])
2377                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2378
2379         if (tb[RTA_MULTIPATH]) {
2380                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2381                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2382         }
2383
2384         err = 0;
2385 errout:
2386         return err;
2387 }
2388
2389 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2390 {
2391         struct fib6_config r_cfg;
2392         struct rtnexthop *rtnh;
2393         int remaining;
2394         int attrlen;
2395         int err = 0, last_err = 0;
2396
2397 beginning:
2398         rtnh = (struct rtnexthop *)cfg->fc_mp;
2399         remaining = cfg->fc_mp_len;
2400
2401         /* Parse a Multipath Entry */
2402         while (rtnh_ok(rtnh, remaining)) {
2403                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2404                 if (rtnh->rtnh_ifindex)
2405                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2406
2407                 attrlen = rtnh_attrlen(rtnh);
2408                 if (attrlen > 0) {
2409                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2410
2411                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2412                         if (nla) {
2413                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2414                                 r_cfg.fc_flags |= RTF_GATEWAY;
2415                         }
2416                 }
2417                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2418                 if (err) {
2419                         last_err = err;
2420                         /* If we are trying to remove a route, do not stop the
2421                          * loop when ip6_route_del() fails (because next hop is
2422                          * already gone), we should try to remove all next hops.
2423                          */
2424                         if (add) {
2425                                 /* If add fails, we should try to delete all
2426                                  * next hops that have been already added.
2427                                  */
2428                                 add = 0;
2429                                 goto beginning;
2430                         }
2431                 }
2432                 /* Because each route is added like a single route we remove
2433                  * this flag after the first nexthop (if there is a collision,
2434                  * we have already fail to add the first nexthop:
2435                  * fib6_add_rt2node() has reject it).
2436                  */
2437                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2438                 rtnh = rtnh_next(rtnh, &remaining);
2439         }
2440
2441         return last_err;
2442 }
2443
2444 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2445 {
2446         struct fib6_config cfg;
2447         int err;
2448
2449         err = rtm_to_fib6_config(skb, nlh, &cfg);
2450         if (err < 0)
2451                 return err;
2452
2453         if (cfg.fc_mp)
2454                 return ip6_route_multipath(&cfg, 0);
2455         else
2456                 return ip6_route_del(&cfg);
2457 }
2458
2459 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2460 {
2461         struct fib6_config cfg;
2462         int err;
2463
2464         err = rtm_to_fib6_config(skb, nlh, &cfg);
2465         if (err < 0)
2466                 return err;
2467
2468         if (cfg.fc_mp)
2469                 return ip6_route_multipath(&cfg, 1);
2470         else
2471                 return ip6_route_add(&cfg);
2472 }
2473
2474 static inline size_t rt6_nlmsg_size(void)
2475 {
2476         return NLMSG_ALIGN(sizeof(struct rtmsg))
2477                + nla_total_size(16) /* RTA_SRC */
2478                + nla_total_size(16) /* RTA_DST */
2479                + nla_total_size(16) /* RTA_GATEWAY */
2480                + nla_total_size(16) /* RTA_PREFSRC */
2481                + nla_total_size(4) /* RTA_TABLE */
2482                + nla_total_size(4) /* RTA_IIF */
2483                + nla_total_size(4) /* RTA_OIF */
2484                + nla_total_size(4) /* RTA_PRIORITY */
2485                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2486                + nla_total_size(sizeof(struct rta_cacheinfo));
2487 }
2488
2489 static int rt6_fill_node(struct net *net,
2490                          struct sk_buff *skb, struct rt6_info *rt,
2491                          struct in6_addr *dst, struct in6_addr *src,
2492                          int iif, int type, u32 portid, u32 seq,
2493                          int prefix, int nowait, unsigned int flags)
2494 {
2495         struct rtmsg *rtm;
2496         struct nlmsghdr *nlh;
2497         long expires;
2498         u32 table;
2499         struct neighbour *n;
2500
2501         if (prefix) {   /* user wants prefix routes only */
2502                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2503                         /* success since this is not a prefix route */
2504                         return 1;
2505                 }
2506         }
2507
2508         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2509         if (!nlh)
2510                 return -EMSGSIZE;
2511
2512         rtm = nlmsg_data(nlh);
2513         rtm->rtm_family = AF_INET6;
2514         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2515         rtm->rtm_src_len = rt->rt6i_src.plen;
2516         rtm->rtm_tos = 0;
2517         if (rt->rt6i_table)
2518                 table = rt->rt6i_table->tb6_id;
2519         else
2520                 table = RT6_TABLE_UNSPEC;
2521         rtm->rtm_table = table;
2522         if (nla_put_u32(skb, RTA_TABLE, table))
2523                 goto nla_put_failure;
2524         if (rt->rt6i_flags & RTF_REJECT) {
2525                 switch (rt->dst.error) {
2526                 case -EINVAL:
2527                         rtm->rtm_type = RTN_BLACKHOLE;
2528                         break;
2529                 case -EACCES:
2530                         rtm->rtm_type = RTN_PROHIBIT;
2531                         break;
2532                 case -EAGAIN:
2533                         rtm->rtm_type = RTN_THROW;
2534                         break;
2535                 default:
2536                         rtm->rtm_type = RTN_UNREACHABLE;
2537                         break;
2538                 }
2539         }
2540         else if (rt->rt6i_flags & RTF_LOCAL)
2541                 rtm->rtm_type = RTN_LOCAL;
2542         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2543                 rtm->rtm_type = RTN_LOCAL;
2544         else
2545                 rtm->rtm_type = RTN_UNICAST;
2546         rtm->rtm_flags = 0;
2547         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2548         rtm->rtm_protocol = rt->rt6i_protocol;
2549         if (rt->rt6i_flags & RTF_DYNAMIC)
2550                 rtm->rtm_protocol = RTPROT_REDIRECT;
2551         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2552                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2553                         rtm->rtm_protocol = RTPROT_RA;
2554                 else
2555                         rtm->rtm_protocol = RTPROT_KERNEL;
2556         }
2557
2558         if (rt->rt6i_flags & RTF_CACHE)
2559                 rtm->rtm_flags |= RTM_F_CLONED;
2560
2561         if (dst) {
2562                 if (nla_put(skb, RTA_DST, 16, dst))
2563                         goto nla_put_failure;
2564                 rtm->rtm_dst_len = 128;
2565         } else if (rtm->rtm_dst_len)
2566                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2567                         goto nla_put_failure;
2568 #ifdef CONFIG_IPV6_SUBTREES
2569         if (src) {
2570                 if (nla_put(skb, RTA_SRC, 16, src))
2571                         goto nla_put_failure;
2572                 rtm->rtm_src_len = 128;
2573         } else if (rtm->rtm_src_len &&
2574                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2575                 goto nla_put_failure;
2576 #endif
2577         if (iif) {
2578 #ifdef CONFIG_IPV6_MROUTE
2579                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2580                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2581                         if (err <= 0) {
2582                                 if (!nowait) {
2583                                         if (err == 0)
2584                                                 return 0;
2585                                         goto nla_put_failure;
2586                                 } else {
2587                                         if (err == -EMSGSIZE)
2588                                                 goto nla_put_failure;
2589                                 }
2590                         }
2591                 } else
2592 #endif
2593                         if (nla_put_u32(skb, RTA_IIF, iif))
2594                                 goto nla_put_failure;
2595         } else if (dst) {
2596                 struct in6_addr saddr_buf;
2597                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2598                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2599                         goto nla_put_failure;
2600         }
2601
2602         if (rt->rt6i_prefsrc.plen) {
2603                 struct in6_addr saddr_buf;
2604                 saddr_buf = rt->rt6i_prefsrc.addr;
2605                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2606                         goto nla_put_failure;
2607         }
2608
2609         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2610                 goto nla_put_failure;
2611
2612         n = rt->n;
2613         if (n) {
2614                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2615                         goto nla_put_failure;
2616         }
2617
2618         if (rt->dst.dev &&
2619             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2620                 goto nla_put_failure;
2621         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2622                 goto nla_put_failure;
2623
2624         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2625
2626         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2627                 goto nla_put_failure;
2628
2629         return nlmsg_end(skb, nlh);
2630
2631 nla_put_failure:
2632         nlmsg_cancel(skb, nlh);
2633         return -EMSGSIZE;
2634 }
2635
2636 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2637 {
2638         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2639         int prefix;
2640
2641         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2642                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2643                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2644         } else
2645                 prefix = 0;
2646
2647         return rt6_fill_node(arg->net,
2648                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2649                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2650                      prefix, 0, NLM_F_MULTI);
2651 }
2652
2653 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2654 {
2655         struct net *net = sock_net(in_skb->sk);
2656         struct nlattr *tb[RTA_MAX+1];
2657         struct rt6_info *rt;
2658         struct sk_buff *skb;
2659         struct rtmsg *rtm;
2660         struct flowi6 fl6;
2661         int err, iif = 0, oif = 0;
2662
2663         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2664         if (err < 0)
2665                 goto errout;
2666
2667         err = -EINVAL;
2668         memset(&fl6, 0, sizeof(fl6));
2669
2670         if (tb[RTA_SRC]) {
2671                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2672                         goto errout;
2673
2674                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2675         }
2676
2677         if (tb[RTA_DST]) {
2678                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2679                         goto errout;
2680
2681                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2682         }
2683
2684         if (tb[RTA_IIF])
2685                 iif = nla_get_u32(tb[RTA_IIF]);
2686
2687         if (tb[RTA_OIF])
2688                 oif = nla_get_u32(tb[RTA_OIF]);
2689
2690         if (iif) {
2691                 struct net_device *dev;
2692                 int flags = 0;
2693
2694                 dev = __dev_get_by_index(net, iif);
2695                 if (!dev) {
2696                         err = -ENODEV;
2697                         goto errout;
2698                 }
2699
2700                 fl6.flowi6_iif = iif;
2701
2702                 if (!ipv6_addr_any(&fl6.saddr))
2703                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2704
2705                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2706                                                                flags);
2707         } else {
2708                 fl6.flowi6_oif = oif;
2709
2710                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2711         }
2712
2713         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2714         if (!skb) {
2715                 ip6_rt_put(rt);
2716                 err = -ENOBUFS;
2717                 goto errout;
2718         }
2719
2720         /* Reserve room for dummy headers, this skb can pass
2721            through good chunk of routing engine.
2722          */
2723         skb_reset_mac_header(skb);
2724         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2725
2726         skb_dst_set(skb, &rt->dst);
2727
2728         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2729                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2730                             nlh->nlmsg_seq, 0, 0, 0);
2731         if (err < 0) {
2732                 kfree_skb(skb);
2733                 goto errout;
2734         }
2735
2736         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2737 errout:
2738         return err;
2739 }
2740
2741 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2742 {
2743         struct sk_buff *skb;
2744         struct net *net = info->nl_net;
2745         u32 seq;
2746         int err;
2747
2748         err = -ENOBUFS;
2749         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2750
2751         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2752         if (!skb)
2753                 goto errout;
2754
2755         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2756                                 event, info->portid, seq, 0, 0, 0);
2757         if (err < 0) {
2758                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2759                 WARN_ON(err == -EMSGSIZE);
2760                 kfree_skb(skb);
2761                 goto errout;
2762         }
2763         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2764                     info->nlh, gfp_any());
2765         return;
2766 errout:
2767         if (err < 0)
2768                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2769 }
2770
2771 static int ip6_route_dev_notify(struct notifier_block *this,
2772                                 unsigned long event, void *data)
2773 {
2774         struct net_device *dev = (struct net_device *)data;
2775         struct net *net = dev_net(dev);
2776
2777         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2778                 net->ipv6.ip6_null_entry->dst.dev = dev;
2779                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2780 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2781                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2782                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2783                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2784                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2785 #endif
2786         }
2787
2788         return NOTIFY_OK;
2789 }
2790
2791 /*
2792  *      /proc
2793  */
2794
2795 #ifdef CONFIG_PROC_FS
2796
2797 struct rt6_proc_arg
2798 {
2799         char *buffer;
2800         int offset;
2801         int length;
2802         int skip;
2803         int len;
2804 };
2805
2806 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2807 {
2808         struct seq_file *m = p_arg;
2809         struct neighbour *n;
2810
2811         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2812
2813 #ifdef CONFIG_IPV6_SUBTREES
2814         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2815 #else
2816         seq_puts(m, "00000000000000000000000000000000 00 ");
2817 #endif
2818         n = rt->n;
2819         if (n) {
2820                 seq_printf(m, "%pi6", n->primary_key);
2821         } else {
2822                 seq_puts(m, "00000000000000000000000000000000");
2823         }
2824         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2825                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2826                    rt->dst.__use, rt->rt6i_flags,
2827                    rt->dst.dev ? rt->dst.dev->name : "");
2828         return 0;
2829 }
2830
2831 static int ipv6_route_show(struct seq_file *m, void *v)
2832 {
2833         struct net *net = (struct net *)m->private;
2834         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2835         return 0;
2836 }
2837
2838 static int ipv6_route_open(struct inode *inode, struct file *file)
2839 {
2840         return single_open_net(inode, file, ipv6_route_show);
2841 }
2842
2843 static const struct file_operations ipv6_route_proc_fops = {
2844         .owner          = THIS_MODULE,
2845         .open           = ipv6_route_open,
2846         .read           = seq_read,
2847         .llseek         = seq_lseek,
2848         .release        = single_release_net,
2849 };
2850
2851 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2852 {
2853         struct net *net = (struct net *)seq->private;
2854         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2855                    net->ipv6.rt6_stats->fib_nodes,
2856                    net->ipv6.rt6_stats->fib_route_nodes,
2857                    net->ipv6.rt6_stats->fib_rt_alloc,
2858                    net->ipv6.rt6_stats->fib_rt_entries,
2859                    net->ipv6.rt6_stats->fib_rt_cache,
2860                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2861                    net->ipv6.rt6_stats->fib_discarded_routes);
2862
2863         return 0;
2864 }
2865
2866 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2867 {
2868         return single_open_net(inode, file, rt6_stats_seq_show);
2869 }
2870
2871 static const struct file_operations rt6_stats_seq_fops = {
2872         .owner   = THIS_MODULE,
2873         .open    = rt6_stats_seq_open,
2874         .read    = seq_read,
2875         .llseek  = seq_lseek,
2876         .release = single_release_net,
2877 };
2878 #endif  /* CONFIG_PROC_FS */
2879
2880 #ifdef CONFIG_SYSCTL
2881
2882 static
2883 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2884                               void __user *buffer, size_t *lenp, loff_t *ppos)
2885 {
2886         struct net *net;
2887         int delay;
2888         if (!write)
2889                 return -EINVAL;
2890
2891         net = (struct net *)ctl->extra1;
2892         delay = net->ipv6.sysctl.flush_delay;
2893         proc_dointvec(ctl, write, buffer, lenp, ppos);
2894         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2895         return 0;
2896 }
2897
2898 ctl_table ipv6_route_table_template[] = {
2899         {
2900                 .procname       =       "flush",
2901                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2902                 .maxlen         =       sizeof(int),
2903                 .mode           =       0200,
2904                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2905         },
2906         {
2907                 .procname       =       "gc_thresh",
2908                 .data           =       &ip6_dst_ops_template.gc_thresh,
2909                 .maxlen         =       sizeof(int),
2910                 .mode           =       0644,
2911                 .proc_handler   =       proc_dointvec,
2912         },
2913         {
2914                 .procname       =       "max_size",
2915                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2916                 .maxlen         =       sizeof(int),
2917                 .mode           =       0644,
2918                 .proc_handler   =       proc_dointvec,
2919         },
2920         {
2921                 .procname       =       "gc_min_interval",
2922                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2923                 .maxlen         =       sizeof(int),
2924                 .mode           =       0644,
2925                 .proc_handler   =       proc_dointvec_jiffies,
2926         },
2927         {
2928                 .procname       =       "gc_timeout",
2929                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2930                 .maxlen         =       sizeof(int),
2931                 .mode           =       0644,
2932                 .proc_handler   =       proc_dointvec_jiffies,
2933         },
2934         {
2935                 .procname       =       "gc_interval",
2936                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2937                 .maxlen         =       sizeof(int),
2938                 .mode           =       0644,
2939                 .proc_handler   =       proc_dointvec_jiffies,
2940         },
2941         {
2942                 .procname       =       "gc_elasticity",
2943                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2944                 .maxlen         =       sizeof(int),
2945                 .mode           =       0644,
2946                 .proc_handler   =       proc_dointvec,
2947         },
2948         {
2949                 .procname       =       "mtu_expires",
2950                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2951                 .maxlen         =       sizeof(int),
2952                 .mode           =       0644,
2953                 .proc_handler   =       proc_dointvec_jiffies,
2954         },
2955         {
2956                 .procname       =       "min_adv_mss",
2957                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2958                 .maxlen         =       sizeof(int),
2959                 .mode           =       0644,
2960                 .proc_handler   =       proc_dointvec,
2961         },
2962         {
2963                 .procname       =       "gc_min_interval_ms",
2964                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2965                 .maxlen         =       sizeof(int),
2966                 .mode           =       0644,
2967                 .proc_handler   =       proc_dointvec_ms_jiffies,
2968         },
2969         { }
2970 };
2971
2972 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2973 {
2974         struct ctl_table *table;
2975
2976         table = kmemdup(ipv6_route_table_template,
2977                         sizeof(ipv6_route_table_template),
2978                         GFP_KERNEL);
2979
2980         if (table) {
2981                 table[0].data = &net->ipv6.sysctl.flush_delay;
2982                 table[0].extra1 = net;
2983                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2984                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2985                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2986                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2987                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2988                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2989                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2990                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2991                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992
2993                 /* Don't export sysctls to unprivileged users */
2994                 if (net->user_ns != &init_user_ns)
2995                         table[0].procname = NULL;
2996         }
2997
2998         return table;
2999 }
3000 #endif
3001
3002 static int __net_init ip6_route_net_init(struct net *net)
3003 {
3004         int ret = -ENOMEM;
3005
3006         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3007                sizeof(net->ipv6.ip6_dst_ops));
3008
3009         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3010                 goto out_ip6_dst_ops;
3011
3012         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3013                                            sizeof(*net->ipv6.ip6_null_entry),
3014                                            GFP_KERNEL);
3015         if (!net->ipv6.ip6_null_entry)
3016                 goto out_ip6_dst_entries;
3017         net->ipv6.ip6_null_entry->dst.path =
3018                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3019         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3020         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3021                          ip6_template_metrics, true);
3022
3023 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3024         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3025                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3026                                                GFP_KERNEL);
3027         if (!net->ipv6.ip6_prohibit_entry)
3028                 goto out_ip6_null_entry;
3029         net->ipv6.ip6_prohibit_entry->dst.path =
3030                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3031         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3032         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3033                          ip6_template_metrics, true);
3034
3035         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3036                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3037                                                GFP_KERNEL);
3038         if (!net->ipv6.ip6_blk_hole_entry)
3039                 goto out_ip6_prohibit_entry;
3040         net->ipv6.ip6_blk_hole_entry->dst.path =
3041                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3042         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3043         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3044                          ip6_template_metrics, true);
3045 #endif
3046
3047         net->ipv6.sysctl.flush_delay = 0;
3048         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3049         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3050         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3051         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3052         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3053         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3054         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3055
3056         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3057
3058         ret = 0;
3059 out:
3060         return ret;
3061
3062 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3063 out_ip6_prohibit_entry:
3064         kfree(net->ipv6.ip6_prohibit_entry);
3065 out_ip6_null_entry:
3066         kfree(net->ipv6.ip6_null_entry);
3067 #endif
3068 out_ip6_dst_entries:
3069         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3070 out_ip6_dst_ops:
3071         goto out;
3072 }
3073
3074 static void __net_exit ip6_route_net_exit(struct net *net)
3075 {
3076         kfree(net->ipv6.ip6_null_entry);
3077 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3078         kfree(net->ipv6.ip6_prohibit_entry);
3079         kfree(net->ipv6.ip6_blk_hole_entry);
3080 #endif
3081         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3082 }
3083
3084 static int __net_init ip6_route_net_init_late(struct net *net)
3085 {
3086 #ifdef CONFIG_PROC_FS
3087         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3088         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3089 #endif
3090         return 0;
3091 }
3092
3093 static void __net_exit ip6_route_net_exit_late(struct net *net)
3094 {
3095 #ifdef CONFIG_PROC_FS
3096         proc_net_remove(net, "ipv6_route");
3097         proc_net_remove(net, "rt6_stats");
3098 #endif
3099 }
3100
3101 static struct pernet_operations ip6_route_net_ops = {
3102         .init = ip6_route_net_init,
3103         .exit = ip6_route_net_exit,
3104 };
3105
3106 static int __net_init ipv6_inetpeer_init(struct net *net)
3107 {
3108         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3109
3110         if (!bp)
3111                 return -ENOMEM;
3112         inet_peer_base_init(bp);
3113         net->ipv6.peers = bp;
3114         return 0;
3115 }
3116
3117 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3118 {
3119         struct inet_peer_base *bp = net->ipv6.peers;
3120
3121         net->ipv6.peers = NULL;
3122         inetpeer_invalidate_tree(bp);
3123         kfree(bp);
3124 }
3125
3126 static struct pernet_operations ipv6_inetpeer_ops = {
3127         .init   =       ipv6_inetpeer_init,
3128         .exit   =       ipv6_inetpeer_exit,
3129 };
3130
3131 static struct pernet_operations ip6_route_net_late_ops = {
3132         .init = ip6_route_net_init_late,
3133         .exit = ip6_route_net_exit_late,
3134 };
3135
3136 static struct notifier_block ip6_route_dev_notifier = {
3137         .notifier_call = ip6_route_dev_notify,
3138         .priority = 0,
3139 };
3140
3141 int __init ip6_route_init(void)
3142 {
3143         int ret;
3144
3145         ret = -ENOMEM;
3146         ip6_dst_ops_template.kmem_cachep =
3147                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3148                                   SLAB_HWCACHE_ALIGN, NULL);
3149         if (!ip6_dst_ops_template.kmem_cachep)
3150                 goto out;
3151
3152         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3153         if (ret)
3154                 goto out_kmem_cache;
3155
3156         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3157         if (ret)
3158                 goto out_dst_entries;
3159
3160         ret = register_pernet_subsys(&ip6_route_net_ops);
3161         if (ret)
3162                 goto out_register_inetpeer;
3163
3164         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3165
3166         /* Registering of the loopback is done before this portion of code,
3167          * the loopback reference in rt6_info will not be taken, do it
3168          * manually for init_net */
3169         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3170         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3171   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3172         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3173         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3174         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3175         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3176   #endif
3177         ret = fib6_init();
3178         if (ret)
3179                 goto out_register_subsys;
3180
3181         ret = xfrm6_init();
3182         if (ret)
3183                 goto out_fib6_init;
3184
3185         ret = fib6_rules_init();
3186         if (ret)
3187                 goto xfrm6_init;
3188
3189         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3190         if (ret)
3191                 goto fib6_rules_init;
3192
3193         ret = -ENOBUFS;
3194         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3195             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3196             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3197                 goto out_register_late_subsys;
3198
3199         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3200         if (ret)
3201                 goto out_register_late_subsys;
3202
3203 out:
3204         return ret;
3205
3206 out_register_late_subsys:
3207         unregister_pernet_subsys(&ip6_route_net_late_ops);
3208 fib6_rules_init:
3209         fib6_rules_cleanup();
3210 xfrm6_init:
3211         xfrm6_fini();
3212 out_fib6_init:
3213         fib6_gc_cleanup();
3214 out_register_subsys:
3215         unregister_pernet_subsys(&ip6_route_net_ops);
3216 out_register_inetpeer:
3217         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3218 out_dst_entries:
3219         dst_entries_destroy(&ip6_dst_blackhole_ops);
3220 out_kmem_cache:
3221         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3222         goto out;
3223 }
3224
3225 void ip6_route_cleanup(void)
3226 {
3227         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3228         unregister_pernet_subsys(&ip6_route_net_late_ops);
3229         fib6_rules_cleanup();
3230         xfrm6_fini();
3231         fib6_gc_cleanup();
3232         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3233         unregister_pernet_subsys(&ip6_route_net_ops);
3234         dst_entries_destroy(&ip6_dst_blackhole_ops);
3235         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3236 }