netfilter: add nf_ipv6_ops hook to fix xt_addrtype with IPv6
[linux-3.10.git] / net / ipv6 / route.c
index 0aefc36..ad0aa6b 100644 (file)
@@ -24,6 +24,8 @@
  *             Fixed routing subtrees.
  */
 
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/export.h>
@@ -55,6 +57,7 @@
 #include <net/xfrm.h>
 #include <net/netevent.h>
 #include <net/netlink.h>
+#include <net/nexthop.h>
 
 #include <asm/uaccess.h>
 
@@ -76,7 +79,10 @@ static int            ip6_dst_gc(struct dst_ops *ops);
 static int             ip6_pkt_discard(struct sk_buff *skb);
 static int             ip6_pkt_discard_out(struct sk_buff *skb);
 static void            ip6_link_failure(struct sk_buff *skb);
-static void            ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+static void            ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                                          struct sk_buff *skb, u32 mtu);
+static void            rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
+                                       struct sk_buff *skb);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -97,10 +103,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
        if (!(rt->dst.flags & DST_HOST))
                return NULL;
 
-       if (!rt->rt6i_peer)
-               rt6_bind_peer(rt, 1);
-
-       peer = rt->rt6i_peer;
+       peer = rt6_get_peer_create(rt);
        if (peer) {
                u32 *old_p = __DST_METRICS_PTR(old);
                unsigned long prev, new;
@@ -121,40 +124,33 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
        return p;
 }
 
-static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+                                            struct sk_buff *skb,
+                                            const void *daddr)
 {
        struct in6_addr *p = &rt->rt6i_gateway;
 
        if (!ipv6_addr_any(p))
                return (const void *) p;
+       else if (skb)
+               return &ipv6_hdr(skb)->daddr;
        return daddr;
 }
 
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+                                         struct sk_buff *skb,
+                                         const void *daddr)
 {
        struct rt6_info *rt = (struct rt6_info *) dst;
        struct neighbour *n;
 
-       daddr = choose_neigh_daddr(rt, daddr);
-       n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
+       daddr = choose_neigh_daddr(rt, skb, daddr);
+       n = __ipv6_neigh_lookup(dst->dev, daddr);
        if (n)
                return n;
        return neigh_create(&nd_tbl, daddr, dst->dev);
 }
 
-static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
-{
-       struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
-       if (!n) {
-               n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
-               if (IS_ERR(n))
-                       return PTR_ERR(n);
-       }
-       dst_set_neighbour(&rt->dst, n);
-
-       return 0;
-}
-
 static struct dst_ops ip6_dst_ops_template = {
        .family                 =       AF_INET6,
        .protocol               =       cpu_to_be16(ETH_P_IPV6),
@@ -169,6 +165,7 @@ static struct dst_ops ip6_dst_ops_template = {
        .negative_advice        =       ip6_negative_advice,
        .link_failure           =       ip6_link_failure,
        .update_pmtu            =       ip6_rt_update_pmtu,
+       .redirect               =       rt6_do_redirect,
        .local_out              =       __ip6_local_out,
        .neigh_lookup           =       ip6_neigh_lookup,
 };
@@ -180,7 +177,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
        return mtu ? : dst->dev->mtu;
 }
 
-static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                                        struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+                                     struct sk_buff *skb)
 {
 }
 
@@ -198,19 +201,20 @@ static struct dst_ops ip6_dst_blackhole_ops = {
        .mtu                    =       ip6_blackhole_mtu,
        .default_advmss         =       ip6_default_advmss,
        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
+       .redirect               =       ip6_rt_blackhole_redirect,
        .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
        .neigh_lookup           =       ip6_neigh_lookup,
 };
 
 static const u32 ip6_template_metrics[RTAX_MAX] = {
-       [RTAX_HOPLIMIT - 1] = 255,
+       [RTAX_HOPLIMIT - 1] = 0,
 };
 
-static struct rt6_info ip6_null_entry_template = {
+static const struct rt6_info ip6_null_entry_template = {
        .dst = {
                .__refcnt       = ATOMIC_INIT(1),
                .__use          = 1,
-               .obsolete       = -1,
+               .obsolete       = DST_OBSOLETE_FORCE_CHK,
                .error          = -ENETUNREACH,
                .input          = ip6_pkt_discard,
                .output         = ip6_pkt_discard_out,
@@ -226,11 +230,11 @@ static struct rt6_info ip6_null_entry_template = {
 static int ip6_pkt_prohibit(struct sk_buff *skb);
 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 
-static struct rt6_info ip6_prohibit_entry_template = {
+static const struct rt6_info ip6_prohibit_entry_template = {
        .dst = {
                .__refcnt       = ATOMIC_INIT(1),
                .__use          = 1,
-               .obsolete       = -1,
+               .obsolete       = DST_OBSOLETE_FORCE_CHK,
                .error          = -EACCES,
                .input          = ip6_pkt_prohibit,
                .output         = ip6_pkt_prohibit_out,
@@ -241,11 +245,11 @@ static struct rt6_info ip6_prohibit_entry_template = {
        .rt6i_ref       = ATOMIC_INIT(1),
 };
 
-static struct rt6_info ip6_blk_hole_entry_template = {
+static const struct rt6_info ip6_blk_hole_entry_template = {
        .dst = {
                .__refcnt       = ATOMIC_INIT(1),
                .__use          = 1,
-               .obsolete       = -1,
+               .obsolete       = DST_OBSOLETE_FORCE_CHK,
                .error          = -EINVAL,
                .input          = dst_discard,
                .output         = dst_discard,
@@ -259,16 +263,23 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 #endif
 
 /* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
+static inline struct rt6_info *ip6_dst_alloc(struct net *net,
                                             struct net_device *dev,
-                                            int flags)
+                                            int flags,
+                                            struct fib6_table *table)
 {
-       struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
+       struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
+                                       0, DST_OBSOLETE_FORCE_CHK, flags);
 
-       if (rt)
-               memset(&rt->rt6i_table, 0,
-                      sizeof(*rt) - sizeof(struct dst_entry));
+       if (rt) {
+               struct dst_entry *dst = &rt->dst;
 
+               memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+               rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+               rt->rt6i_genid = rt_genid(net);
+               INIT_LIST_HEAD(&rt->rt6i_siblings);
+               rt->rt6i_nsiblings = 0;
+       }
        return rt;
 }
 
@@ -276,7 +287,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 {
        struct rt6_info *rt = (struct rt6_info *)dst;
        struct inet6_dev *idev = rt->rt6i_idev;
-       struct inet_peer *peer = rt->rt6i_peer;
+       struct dst_entry *from = dst->from;
 
        if (!(rt->dst.flags & DST_HOST))
                dst_destroy_metrics_generic(dst);
@@ -286,31 +297,29 @@ static void ip6_dst_destroy(struct dst_entry *dst)
                in6_dev_put(idev);
        }
 
-       if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
-               dst_release(dst->from);
+       dst->from = NULL;
+       dst_release(from);
 
-       if (peer) {
-               rt->rt6i_peer = NULL;
+       if (rt6_has_peer(rt)) {
+               struct inet_peer *peer = rt6_peer_ptr(rt);
                inet_putpeer(peer);
        }
 }
 
-static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
-
-static u32 rt6_peer_genid(void)
-{
-       return atomic_read(&__rt6_peer_genid);
-}
-
 void rt6_bind_peer(struct rt6_info *rt, int create)
 {
+       struct inet_peer_base *base;
        struct inet_peer *peer;
 
-       peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
-       if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
-               inet_putpeer(peer);
-       else
-               rt->rt6i_peer_genid = rt6_peer_genid();
+       base = inetpeer_base_ptr(rt->_rt6i_peer);
+       if (!base)
+               return;
+
+       peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
+       if (peer) {
+               if (!rt6_set_peer(rt, peer))
+                       inet_putpeer(peer);
+       }
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -321,37 +330,91 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
        struct net_device *loopback_dev =
                dev_net(dev)->loopback_dev;
 
-       if (dev != loopback_dev && idev && idev->dev == dev) {
-               struct inet6_dev *loopback_idev =
-                       in6_dev_get(loopback_dev);
-               if (loopback_idev) {
-                       rt->rt6i_idev = loopback_idev;
-                       in6_dev_put(idev);
+       if (dev != loopback_dev) {
+               if (idev && idev->dev == dev) {
+                       struct inet6_dev *loopback_idev =
+                               in6_dev_get(loopback_dev);
+                       if (loopback_idev) {
+                               rt->rt6i_idev = loopback_idev;
+                               in6_dev_put(idev);
+                       }
                }
        }
 }
 
-static __inline__ int rt6_check_expired(const struct rt6_info *rt)
+static bool rt6_check_expired(const struct rt6_info *rt)
 {
-       struct rt6_info *ort = NULL;
-
        if (rt->rt6i_flags & RTF_EXPIRES) {
                if (time_after(jiffies, rt->dst.expires))
-                       return 1;
+                       return true;
        } else if (rt->dst.from) {
-               ort = (struct rt6_info *) rt->dst.from;
-               return (ort->rt6i_flags & RTF_EXPIRES) &&
-                       time_after(jiffies, ort->dst.expires);
+               return rt6_check_expired((struct rt6_info *) rt->dst.from);
        }
-       return 0;
+       return false;
 }
 
-static inline int rt6_need_strict(const struct in6_addr *daddr)
+static bool rt6_need_strict(const struct in6_addr *daddr)
 {
        return ipv6_addr_type(daddr) &
                (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
+/* Multipath route selection:
+ *   Hash based function using packet header and flowlabel.
+ * Adapted from fib_info_hashfn()
+ */
+static int rt6_info_hash_nhsfn(unsigned int candidate_count,
+                              const struct flowi6 *fl6)
+{
+       unsigned int val = fl6->flowi6_proto;
+
+       val ^= ipv6_addr_hash(&fl6->daddr);
+       val ^= ipv6_addr_hash(&fl6->saddr);
+
+       /* Work only if this not encapsulated */
+       switch (fl6->flowi6_proto) {
+       case IPPROTO_UDP:
+       case IPPROTO_TCP:
+       case IPPROTO_SCTP:
+               val ^= (__force u16)fl6->fl6_sport;
+               val ^= (__force u16)fl6->fl6_dport;
+               break;
+
+       case IPPROTO_ICMPV6:
+               val ^= (__force u16)fl6->fl6_icmp_type;
+               val ^= (__force u16)fl6->fl6_icmp_code;
+               break;
+       }
+       /* RFC6438 recommands to use flowlabel */
+       val ^= (__force u32)fl6->flowlabel;
+
+       /* Perhaps, we need to tune, this function? */
+       val = val ^ (val >> 7) ^ (val >> 12);
+       return val % candidate_count;
+}
+
+static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+                                            struct flowi6 *fl6)
+{
+       struct rt6_info *sibling, *next_sibling;
+       int route_choosen;
+
+       route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+       /* Don't change the route, if route_choosen == 0
+        * (siblings does not include ourself)
+        */
+       if (route_choosen)
+               list_for_each_entry_safe(sibling, next_sibling,
+                               &match->rt6i_siblings, rt6i_siblings) {
+                       route_choosen--;
+                       if (route_choosen == 0) {
+                               match = sibling;
+                               break;
+                       }
+               }
+       return match;
+}
+
 /*
  *     Route lookup. Any table->tb6_lock is implied.
  */
@@ -415,27 +478,34 @@ static void rt6_probe(struct rt6_info *rt)
         * Router Reachability Probe MUST be rate-limited
         * to no more than one per minute.
         */
-       rcu_read_lock();
-       neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
-       if (!neigh || (neigh->nud_state & NUD_VALID))
-               goto out;
-       read_lock_bh(&neigh->lock);
-       if (!(neigh->nud_state & NUD_VALID) &&
+       if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
+               return;
+       rcu_read_lock_bh();
+       neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+       if (neigh) {
+               write_lock(&neigh->lock);
+               if (neigh->nud_state & NUD_VALID)
+                       goto out;
+       }
+
+       if (!neigh ||
            time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
                struct in6_addr mcaddr;
                struct in6_addr *target;
 
-               neigh->updated = jiffies;
-               read_unlock_bh(&neigh->lock);
+               if (neigh) {
+                       neigh->updated = jiffies;
+                       write_unlock(&neigh->lock);
+               }
 
-               target = (struct in6_addr *)&neigh->primary_key;
+               target = (struct in6_addr *)&rt->rt6i_gateway;
                addrconf_addr_solict_mult(target, &mcaddr);
                ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
        } else {
-               read_unlock_bh(&neigh->lock);
-       }
 out:
-       rcu_read_unlock();
+               write_unlock(&neigh->lock);
+       }
+       rcu_read_unlock_bh();
 }
 #else
 static inline void rt6_probe(struct rt6_info *rt)
@@ -457,37 +527,36 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
        return 0;
 }
 
-static inline int rt6_check_neigh(struct rt6_info *rt)
+static inline bool rt6_check_neigh(struct rt6_info *rt)
 {
        struct neighbour *neigh;
-       int m;
+       bool ret = false;
 
-       rcu_read_lock();
-       neigh = dst_get_neighbour_noref(&rt->dst);
        if (rt->rt6i_flags & RTF_NONEXTHOP ||
            !(rt->rt6i_flags & RTF_GATEWAY))
-               m = 1;
-       else if (neigh) {
-               read_lock_bh(&neigh->lock);
+               return true;
+
+       rcu_read_lock_bh();
+       neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
+       if (neigh) {
+               read_lock(&neigh->lock);
                if (neigh->nud_state & NUD_VALID)
-                       m = 2;
+                       ret = true;
 #ifdef CONFIG_IPV6_ROUTER_PREF
-               else if (neigh->nud_state & NUD_FAILED)
-                       m = 0;
+               else if (!(neigh->nud_state & NUD_FAILED))
+                       ret = true;
 #endif
-               else
-                       m = 1;
-               read_unlock_bh(&neigh->lock);
-       } else
-               m = 0;
-       rcu_read_unlock();
-       return m;
+               read_unlock(&neigh->lock);
+       }
+       rcu_read_unlock_bh();
+
+       return ret;
 }
 
 static int rt6_score_route(struct rt6_info *rt, int oif,
                           int strict)
 {
-       int m, n;
+       int m;
 
        m = rt6_check_dev(rt, oif);
        if (!m && (strict & RT6_LOOKUP_F_IFACE))
@@ -495,8 +564,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 #ifdef CONFIG_IPV6_ROUTER_PREF
        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 #endif
-       n = rt6_check_neigh(rt);
-       if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
+       if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
                return -1;
        return m;
 }
@@ -638,7 +706,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                else
                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 
-               dst_release(&rt->dst);
+               ip6_rt_put(rt);
        }
        return 0;
 }
@@ -674,6 +742,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 restart:
        rt = fn->leaf;
        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+       if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+               rt = rt6_multipath_select(rt, fl6);
        BACKTRACK(net, &fl6->saddr);
 out:
        dst_use(&rt->dst, jiffies);
@@ -755,8 +825,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
        rt = ip6_rt_copy(ort, daddr);
 
        if (rt) {
-               int attempts = !in_softirq();
-
                if (!(rt->rt6i_flags & RTF_GATEWAY)) {
                        if (ort->rt6i_dst.plen != 128 &&
                            ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
@@ -772,34 +840,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
                        rt->rt6i_src.plen = 128;
                }
 #endif
-
-       retry:
-               if (rt6_bind_neighbour(rt, rt->dst.dev)) {
-                       struct net *net = dev_net(rt->dst.dev);
-                       int saved_rt_min_interval =
-                               net->ipv6.sysctl.ip6_rt_gc_min_interval;
-                       int saved_rt_elasticity =
-                               net->ipv6.sysctl.ip6_rt_gc_elasticity;
-
-                       if (attempts-- > 0) {
-                               net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
-                               net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
-
-                               ip6_dst_gc(&net->ipv6.ip6_dst_ops);
-
-                               net->ipv6.sysctl.ip6_rt_gc_elasticity =
-                                       saved_rt_elasticity;
-                               net->ipv6.sysctl.ip6_rt_gc_min_interval =
-                                       saved_rt_min_interval;
-                               goto retry;
-                       }
-
-                       if (net_ratelimit())
-                               printk(KERN_WARNING
-                                      "ipv6: Neighbour table overflow.\n");
-                       dst_free(&rt->dst);
-                       return NULL;
-               }
        }
 
        return rt;
@@ -810,10 +850,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 {
        struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 
-       if (rt) {
+       if (rt)
                rt->rt6i_flags |= RTF_CACHE;
-               dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
-       }
        return rt;
 }
 
@@ -837,7 +875,8 @@ restart_2:
 
 restart:
        rt = rt6_select(fn, oif, strict | reachable);
-
+       if (rt->rt6i_nsiblings && oif == 0)
+               rt = rt6_multipath_select(rt, fl6);
        BACKTRACK(net, &fl6->saddr);
        if (rt == net->ipv6.ip6_null_entry ||
            rt->rt6i_flags & RTF_CACHE)
@@ -846,14 +885,14 @@ restart:
        dst_hold(&rt->dst);
        read_unlock_bh(&table->tb6_lock);
 
-       if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+       if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
                nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
        else if (!(rt->dst.flags & DST_HOST))
                nrt = rt6_alloc_clone(rt, &fl6->daddr);
        else
                goto out2;
 
-       dst_release(&rt->dst);
+       ip6_rt_put(rt);
        rt = nrt ? : net->ipv6.ip6_null_entry;
 
        dst_hold(&rt->dst);
@@ -870,7 +909,7 @@ restart:
         * Race condition! In the gap, when table->tb6_lock was
         * released someone could insert this route.  Relookup.
         */
-       dst_release(&rt->dst);
+       ip6_rt_put(rt);
        goto relookup;
 
 out:
@@ -912,7 +951,7 @@ void ip6_route_input(struct sk_buff *skb)
                .flowi6_iif = skb->dev->ifindex,
                .daddr = iph->daddr,
                .saddr = iph->saddr,
-               .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+               .flowlabel = ip6_flowinfo(iph),
                .flowi6_mark = skb->mark,
                .flowi6_proto = iph->nexthdr,
        };
@@ -931,6 +970,8 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 {
        int flags = 0;
 
+       fl6->flowi6_iif = LOOPBACK_IFINDEX;
+
        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
                flags |= RT6_LOOKUP_F_IFACE;
 
@@ -949,12 +990,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
        struct dst_entry *new = NULL;
 
-       rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
+       rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
        if (rt) {
-               memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
-
                new = &rt->dst;
 
+               memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+               rt6_init_peer(rt, net->ipv6.peers);
+
                new->__use = 1;
                new->input = dst_discard;
                new->output = dst_discard;
@@ -969,7 +1011,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 
                rt->rt6i_gateway = ort->rt6i_gateway;
                rt->rt6i_flags = ort->rt6i_flags;
-               rt6_clean_expires(rt);
                rt->rt6i_metric = 0;
 
                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -994,14 +1035,16 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
        rt = (struct rt6_info *) dst;
 
-       if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
-               if (rt->rt6i_peer_genid != rt6_peer_genid()) {
-                       if (!rt->rt6i_peer)
-                               rt6_bind_peer(rt, 0);
-                       rt->rt6i_peer_genid = rt6_peer_genid();
-               }
+       /* All IPV6 dsts are created with ->obsolete set to the value
+        * DST_OBSOLETE_FORCE_CHK which forces validation calls down
+        * into this function always.
+        */
+       if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
+               return NULL;
+
+       if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
                return dst;
-       }
+
        return NULL;
 }
 
@@ -1038,11 +1081,15 @@ static void ip6_link_failure(struct sk_buff *skb)
        }
 }
 
-static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                              struct sk_buff *skb, u32 mtu)
 {
        struct rt6_info *rt6 = (struct rt6_info*)dst;
 
+       dst_confirm(dst);
        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+               struct net *net = dev_net(dst->dev);
+
                rt6->rt6i_flags |= RTF_MODIFIED;
                if (mtu < IPV6_MIN_MTU) {
                        u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1051,9 +1098,66 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
                        dst_metric_set(dst, RTAX_FEATURES, features);
                }
                dst_metric_set(dst, RTAX_MTU, mtu);
+               rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
        }
 }
 
+void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
+                    int oif, u32 mark)
+{
+       const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+       struct dst_entry *dst;
+       struct flowi6 fl6;
+
+       memset(&fl6, 0, sizeof(fl6));
+       fl6.flowi6_oif = oif;
+       fl6.flowi6_mark = mark;
+       fl6.flowi6_flags = 0;
+       fl6.daddr = iph->daddr;
+       fl6.saddr = iph->saddr;
+       fl6.flowlabel = ip6_flowinfo(iph);
+
+       dst = ip6_route_output(net, NULL, &fl6);
+       if (!dst->error)
+               ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
+       dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+
+void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+{
+       ip6_update_pmtu(skb, sock_net(sk), mtu,
+                       sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+{
+       const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
+       struct dst_entry *dst;
+       struct flowi6 fl6;
+
+       memset(&fl6, 0, sizeof(fl6));
+       fl6.flowi6_oif = oif;
+       fl6.flowi6_mark = mark;
+       fl6.flowi6_flags = 0;
+       fl6.daddr = iph->daddr;
+       fl6.saddr = iph->saddr;
+       fl6.flowlabel = ip6_flowinfo(iph);
+
+       dst = ip6_route_output(net, NULL, &fl6);
+       if (!dst->error)
+               rt6_do_redirect(dst, NULL, skb);
+       dst_release(dst);
+}
+EXPORT_SYMBOL_GPL(ip6_redirect);
+
+void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
+{
+       ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+}
+EXPORT_SYMBOL_GPL(ip6_sk_redirect);
+
 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 {
        struct net_device *dev = dst->dev;
@@ -1099,7 +1203,6 @@ static struct dst_entry *icmp6_dst_gc_list;
 static DEFINE_SPINLOCK(icmp6_dst_lock);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
-                                 struct neighbour *neigh,
                                  struct flowi6 *fl6)
 {
        struct dst_entry *dst;
@@ -1110,32 +1213,20 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
        if (unlikely(!idev))
                return ERR_PTR(-ENODEV);
 
-       rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
+       rt = ip6_dst_alloc(net, dev, 0, NULL);
        if (unlikely(!rt)) {
                in6_dev_put(idev);
                dst = ERR_PTR(-ENOMEM);
                goto out;
        }
 
-       if (neigh)
-               neigh_hold(neigh);
-       else {
-               neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
-               if (IS_ERR(neigh)) {
-                       in6_dev_put(idev);
-                       dst_free(&rt->dst);
-                       return ERR_CAST(neigh);
-               }
-       }
-
        rt->dst.flags |= DST_HOST;
        rt->dst.output  = ip6_output;
-       dst_set_neighbour(&rt->dst, neigh);
        atomic_set(&rt->dst.__refcnt, 1);
        rt->rt6i_dst.addr = fl6->daddr;
        rt->rt6i_dst.plen = 128;
        rt->rt6i_idev     = idev;
-       dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
+       dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
 
        spin_lock_bh(&icmp6_dst_lock);
        rt->dst.next = icmp6_dst_gc_list;
@@ -1219,12 +1310,6 @@ out:
        return entries > rt_max_size;
 }
 
-/* Clean host part of a prefix. Not necessary in radix tree,
-   but results in cleaner routing tables.
-
-   Remove it only when all the things will work!
- */
-
 int ip6_dst_hoplimit(struct dst_entry *dst)
 {
        int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
@@ -1282,7 +1367,7 @@ int ip6_route_add(struct fib6_config *cfg)
            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
                table = fib6_get_table(net, cfg->fc_table);
                if (!table) {
-                       printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
+                       pr_warn("NLM_F_CREATE should be specified when creating new route\n");
                        table = fib6_new_table(net, cfg->fc_table);
                }
        } else {
@@ -1292,15 +1377,13 @@ int ip6_route_add(struct fib6_config *cfg)
        if (!table)
                goto out;
 
-       rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
+       rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
 
        if (!rt) {
                err = -ENOMEM;
                goto out;
        }
 
-       rt->dst.obsolete = -1;
-
        if (cfg->fc_flags & RTF_EXPIRES)
                rt6_set_expires(rt, jiffies +
                                clock_t_to_jiffies(cfg->fc_expires));
@@ -1365,8 +1448,21 @@ int ip6_route_add(struct fib6_config *cfg)
                }
                rt->dst.output = ip6_pkt_discard_out;
                rt->dst.input = ip6_pkt_discard;
-               rt->dst.error = -ENETUNREACH;
                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+               switch (cfg->fc_type) {
+               case RTN_BLACKHOLE:
+                       rt->dst.error = -EINVAL;
+                       break;
+               case RTN_PROHIBIT:
+                       rt->dst.error = -EACCES;
+                       break;
+               case RTN_THROW:
+                       rt->dst.error = -EAGAIN;
+                       break;
+               default:
+                       rt->dst.error = -ENETUNREACH;
+                       break;
+               }
                goto install_route;
        }
 
@@ -1399,7 +1495,7 @@ int ip6_route_add(struct fib6_config *cfg)
                                goto out;
                        if (dev) {
                                if (dev != grt->dst.dev) {
-                                       dst_release(&grt->dst);
+                                       ip6_rt_put(grt);
                                        goto out;
                                }
                        } else {
@@ -1410,7 +1506,7 @@ int ip6_route_add(struct fib6_config *cfg)
                        }
                        if (!(grt->rt6i_flags & RTF_GATEWAY))
                                err = 0;
-                       dst_release(&grt->dst);
+                       ip6_rt_put(grt);
 
                        if (err)
                                goto out;
@@ -1434,12 +1530,6 @@ int ip6_route_add(struct fib6_config *cfg)
        } else
                rt->rt6i_prefsrc.plen = 0;
 
-       if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
-               err = rt6_bind_neighbour(rt, dev);
-               if (err)
-                       goto out;
-       }
-
        rt->rt6i_flags = cfg->fc_flags;
 
 install_route:
@@ -1485,17 +1575,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
        struct fib6_table *table;
        struct net *net = dev_net(rt->dst.dev);
 
-       if (rt == net->ipv6.ip6_null_entry)
-               return -ENOENT;
+       if (rt == net->ipv6.ip6_null_entry) {
+               err = -ENOENT;
+               goto out;
+       }
 
        table = rt->rt6i_table;
        write_lock_bh(&table->tb6_lock);
-
        err = fib6_del(rt, info);
-       dst_release(&rt->dst);
-
        write_unlock_bh(&table->tb6_lock);
 
+out:
+       ip6_rt_put(rt);
        return err;
 }
 
@@ -1546,109 +1637,84 @@ static int ip6_route_del(struct fib6_config *cfg)
        return err;
 }
 
-/*
- *     Handle redirects
- */
-struct ip6rd_flowi {
-       struct flowi6 fl6;
-       struct in6_addr gateway;
-};
-
-static struct rt6_info *__ip6_route_redirect(struct net *net,
-                                            struct fib6_table *table,
-                                            struct flowi6 *fl6,
-                                            int flags)
+static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 {
-       struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-       struct rt6_info *rt;
-       struct fib6_node *fn;
+       struct net *net = dev_net(skb->dev);
+       struct netevent_redirect netevent;
+       struct rt6_info *rt, *nrt = NULL;
+       struct ndisc_options ndopts;
+       struct inet6_dev *in6_dev;
+       struct neighbour *neigh;
+       struct rd_msg *msg;
+       int optlen, on_link;
+       u8 *lladdr;
 
-       /*
-        * Get the "current" route for this destination and
-        * check if the redirect has come from approriate router.
-        *
-        * RFC 2461 specifies that redirects should only be
-        * accepted if they come from the nexthop to the target.
-        * Due to the way the routes are chosen, this notion
-        * is a bit fuzzy and one might need to check all possible
-        * routes.
-        */
+       optlen = skb->tail - skb->transport_header;
+       optlen -= sizeof(*msg);
 
-       read_lock_bh(&table->tb6_lock);
-       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
-restart:
-       for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
-               /*
-                * Current route is on-link; redirect is always invalid.
-                *
-                * Seems, previous statement is not true. It could
-                * be node, which looks for us as on-link (f.e. proxy ndisc)
-                * But then router serving it might decide, that we should
-                * know truth 8)8) --ANK (980726).
-                */
-               if (rt6_check_expired(rt))
-                       continue;
-               if (!(rt->rt6i_flags & RTF_GATEWAY))
-                       continue;
-               if (fl6->flowi6_oif != rt->dst.dev->ifindex)
-                       continue;
-               if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
-                       continue;
-               break;
+       if (optlen < 0) {
+               net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
+               return;
        }
 
-       if (!rt)
-               rt = net->ipv6.ip6_null_entry;
-       BACKTRACK(net, &fl6->saddr);
-out:
-       dst_hold(&rt->dst);
-
-       read_unlock_bh(&table->tb6_lock);
-
-       return rt;
-};
+       msg = (struct rd_msg *)icmp6_hdr(skb);
 
-static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
-                                          const struct in6_addr *src,
-                                          const struct in6_addr *gateway,
-                                          struct net_device *dev)
-{
-       int flags = RT6_LOOKUP_F_HAS_SADDR;
-       struct net *net = dev_net(dev);
-       struct ip6rd_flowi rdfl = {
-               .fl6 = {
-                       .flowi6_oif = dev->ifindex,
-                       .daddr = *dest,
-                       .saddr = *src,
-               },
-       };
+       if (ipv6_addr_is_multicast(&msg->dest)) {
+               net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
+               return;
+       }
 
-       rdfl.gateway = *gateway;
+       on_link = 0;
+       if (ipv6_addr_equal(&msg->dest, &msg->target)) {
+               on_link = 1;
+       } else if (ipv6_addr_type(&msg->target) !=
+                  (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
+               net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
+               return;
+       }
 
-       if (rt6_need_strict(dest))
-               flags |= RT6_LOOKUP_F_IFACE;
+       in6_dev = __in6_dev_get(skb->dev);
+       if (!in6_dev)
+               return;
+       if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+               return;
 
-       return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
-                                                  flags, __ip6_route_redirect);
-}
+       /* RFC2461 8.1:
+        *      The IP source address of the Redirect MUST be the same as the current
+        *      first-hop router for the specified ICMP Destination Address.
+        */
 
-void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
-                 const struct in6_addr *saddr,
-                 struct neighbour *neigh, u8 *lladdr, int on_link)
-{
-       struct rt6_info *rt, *nrt = NULL;
-       struct netevent_redirect netevent;
-       struct net *net = dev_net(neigh->dev);
+       if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
+               net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
+               return;
+       }
 
-       rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+       lladdr = NULL;
+       if (ndopts.nd_opts_tgt_lladdr) {
+               lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+                                            skb->dev);
+               if (!lladdr) {
+                       net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
+                       return;
+               }
+       }
 
+       rt = (struct rt6_info *) dst;
        if (rt == net->ipv6.ip6_null_entry) {
-               if (net_ratelimit())
-                       printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
-                              "for redirect target\n");
-               goto out;
+               net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
+               return;
        }
 
+       /* Redirect received -> path was valid.
+        * Look, redirects are sent only in response to data packets,
+        * so that this nexthop apparently is reachable. --ANK
+        */
+       dst_confirm(&rt->dst);
+
+       neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
+       if (!neigh)
+               return;
+
        /*
         *      We have finally decided to accept it.
         */
@@ -1660,18 +1726,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
                                     NEIGH_UPDATE_F_ISROUTER))
                     );
 
-       /*
-        * Redirect received -> path was valid.
-        * Look, redirects are sent only in response to data packets,
-        * so that this nexthop apparently is reachable. --ANK
-        */
-       dst_confirm(&rt->dst);
-
-       /* Duplicate redirect: silently ignore. */
-       if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
-               goto out;
-
-       nrt = ip6_rt_copy(rt, dest);
+       nrt = ip6_rt_copy(rt, &msg->dest);
        if (!nrt)
                goto out;
 
@@ -1680,132 +1735,23 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
                nrt->rt6i_flags &= ~RTF_GATEWAY;
 
        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
-       dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
 
        if (ip6_ins_rt(nrt))
                goto out;
 
        netevent.old = &rt->dst;
        netevent.new = &nrt->dst;
+       netevent.daddr = &msg->dest;
+       netevent.neigh = neigh;
        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
        if (rt->rt6i_flags & RTF_CACHE) {
+               rt = (struct rt6_info *) dst_clone(&rt->dst);
                ip6_del_rt(rt);
-               return;
-       }
-
-out:
-       dst_release(&rt->dst);
-}
-
-/*
- *     Handle ICMP "packet too big" messages
- *     i.e. Path MTU discovery
- */
-
-static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
-                            struct net *net, u32 pmtu, int ifindex)
-{
-       struct rt6_info *rt, *nrt;
-       int allfrag = 0;
-again:
-       rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
-       if (!rt)
-               return;
-
-       if (rt6_check_expired(rt)) {
-               ip6_del_rt(rt);
-               goto again;
-       }
-
-       if (pmtu >= dst_mtu(&rt->dst))
-               goto out;
-
-       if (pmtu < IPV6_MIN_MTU) {
-               /*
-                * According to RFC2460, PMTU is set to the IPv6 Minimum Link
-                * MTU (1280) and a fragment header should always be included
-                * after a node receiving Too Big message reporting PMTU is
-                * less than the IPv6 Minimum Link MTU.
-                */
-               pmtu = IPV6_MIN_MTU;
-               allfrag = 1;
        }
 
-       /* New mtu received -> path was valid.
-          They are sent only in response to data packets,
-          so that this nexthop apparently is reachable. --ANK
-        */
-       dst_confirm(&rt->dst);
-
-       /* Host route. If it is static, it would be better
-          not to override it, but add new one, so that
-          when cache entry will expire old pmtu
-          would return automatically.
-        */
-       if (rt->rt6i_flags & RTF_CACHE) {
-               dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
-               if (allfrag) {
-                       u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
-                       features |= RTAX_FEATURE_ALLFRAG;
-                       dst_metric_set(&rt->dst, RTAX_FEATURES, features);
-               }
-               rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-               rt->rt6i_flags |= RTF_MODIFIED;
-               goto out;
-       }
-
-       /* Network route.
-          Two cases are possible:
-          1. It is connected route. Action: COW
-          2. It is gatewayed route or NONEXTHOP route. Action: clone it.
-        */
-       if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
-               nrt = rt6_alloc_cow(rt, daddr, saddr);
-       else
-               nrt = rt6_alloc_clone(rt, daddr);
-
-       if (nrt) {
-               dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
-               if (allfrag) {
-                       u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
-                       features |= RTAX_FEATURE_ALLFRAG;
-                       dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
-               }
-
-               /* According to RFC 1981, detecting PMTU increase shouldn't be
-                * happened within 5 mins, the recommended timer is 10 mins.
-                * Here this route expiration time is set to ip6_rt_mtu_expires
-                * which is 10 mins. After 10 mins the decreased pmtu is expired
-                * and detecting PMTU increase will be automatically happened.
-                */
-               rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
-               nrt->rt6i_flags |= RTF_DYNAMIC;
-               ip6_ins_rt(nrt);
-       }
 out:
-       dst_release(&rt->dst);
-}
-
-void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
-                       struct net_device *dev, u32 pmtu)
-{
-       struct net *net = dev_net(dev);
-
-       /*
-        * RFC 1981 states that a node "MUST reduce the size of the packets it
-        * is sending along the path" that caused the Packet Too Big message.
-        * Since it's not possible in the general case to determine which
-        * interface was used to send the original packet, we update the MTU
-        * on the interface that will be used to send future packets. We also
-        * update the MTU on the interface that received the Packet Too Big in
-        * case the original packet was forced out that interface with
-        * SO_BINDTODEVICE or similar. This is the next best thing to the
-        * correct behaviour, which would be to update the MTU on all
-        * interfaces.
-        */
-       rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
-       rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
+       neigh_release(neigh);
 }
 
 /*
@@ -1816,8 +1762,8 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
                                    const struct in6_addr *dest)
 {
        struct net *net = dev_net(ort->dst.dev);
-       struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-                                           ort->dst.dev, 0);
+       struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+                                           ort->rt6i_table);
 
        if (rt) {
                rt->dst.input = ort->dst.input;
@@ -1838,8 +1784,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
                if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
                    (RTF_DEFAULT | RTF_ADDRCONF))
                        rt6_set_from(rt, ort);
-               else
-                       rt6_clean_expires(rt);
                rt->rt6i_metric = 0;
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1864,7 +1808,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
        if (!table)
                return NULL;
 
-       write_lock_bh(&table->tb6_lock);
+       read_lock_bh(&table->tb6_lock);
        fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
        if (!fn)
                goto out;
@@ -1880,7 +1824,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
                break;
        }
 out:
-       write_unlock_bh(&table->tb6_lock);
+       read_unlock_bh(&table->tb6_lock);
        return rt;
 }
 
@@ -1896,7 +1840,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
                .fc_dst_len     = prefixlen,
                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
                                  RTF_UP | RTF_PREF(pref),
-               .fc_nlinfo.pid = 0,
+               .fc_nlinfo.portid = 0,
                .fc_nlinfo.nlh = NULL,
                .fc_nlinfo.nl_net = net,
        };
@@ -1923,7 +1867,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
        if (!table)
                return NULL;
 
-       write_lock_bh(&table->tb6_lock);
+       read_lock_bh(&table->tb6_lock);
        for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
                if (dev == rt->dst.dev &&
                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
@@ -1932,7 +1876,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
        }
        if (rt)
                dst_hold(&rt->dst);
-       write_unlock_bh(&table->tb6_lock);
+       read_unlock_bh(&table->tb6_lock);
        return rt;
 }
 
@@ -1946,7 +1890,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
                .fc_ifindex     = dev->ifindex,
                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
-               .fc_nlinfo.pid = 0,
+               .fc_nlinfo.portid = 0,
                .fc_nlinfo.nlh = NULL,
                .fc_nlinfo.nl_net = dev_net(dev),
        };
@@ -1971,7 +1915,8 @@ void rt6_purge_dflt_routers(struct net *net)
 restart:
        read_lock_bh(&table->tb6_lock);
        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
-               if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
+               if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+                   (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
                        dst_hold(&rt->dst);
                        read_unlock_bh(&table->tb6_lock);
                        ip6_del_rt(rt);
@@ -2011,7 +1956,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
        switch(cmd) {
        case SIOCADDRT:         /* Add a route */
        case SIOCDELRT:         /* Delete a route */
-               if (!capable(CAP_NET_ADMIN))
+               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                err = copy_from_user(&rtmsg, arg,
                                     sizeof(struct in6_rtmsg));
@@ -2101,14 +2046,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
                                    bool anycast)
 {
        struct net *net = dev_net(idev->dev);
-       struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-                                           net->loopback_dev, 0);
-       int err;
+       struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
 
        if (!rt) {
-               if (net_ratelimit())
-                       pr_warning("IPv6:  Maximum number of routes reached,"
-                                  " consider increasing route/max_size.\n");
+               net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
                return ERR_PTR(-ENOMEM);
        }
 
@@ -2118,18 +2059,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
        rt->dst.input = ip6_input;
        rt->dst.output = ip6_output;
        rt->rt6i_idev = idev;
-       rt->dst.obsolete = -1;
 
        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
        if (anycast)
                rt->rt6i_flags |= RTF_ANYCAST;
        else
                rt->rt6i_flags |= RTF_LOCAL;
-       err = rt6_bind_neighbour(rt, rt->dst.dev);
-       if (err) {
-               dst_free(&rt->dst);
-               return ERR_PTR(err);
-       }
 
        rt->rt6i_dst.addr = *addr;
        rt->rt6i_dst.plen = 128;
@@ -2277,6 +2212,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
        [RTA_IIF]               = { .type = NLA_U32 },
        [RTA_PRIORITY]          = { .type = NLA_U32 },
        [RTA_METRICS]           = { .type = NLA_NESTED },
+       [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2299,14 +2235,18 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
        cfg->fc_src_len = rtm->rtm_src_len;
        cfg->fc_flags = RTF_UP;
        cfg->fc_protocol = rtm->rtm_protocol;
+       cfg->fc_type = rtm->rtm_type;
 
-       if (rtm->rtm_type == RTN_UNREACHABLE)
+       if (rtm->rtm_type == RTN_UNREACHABLE ||
+           rtm->rtm_type == RTN_BLACKHOLE ||
+           rtm->rtm_type == RTN_PROHIBIT ||
+           rtm->rtm_type == RTN_THROW)
                cfg->fc_flags |= RTF_REJECT;
 
        if (rtm->rtm_type == RTN_LOCAL)
                cfg->fc_flags |= RTF_LOCAL;
 
-       cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+       cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
        cfg->fc_nlinfo.nlh = nlh;
        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
@@ -2350,12 +2290,72 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (tb[RTA_TABLE])
                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
 
+       if (tb[RTA_MULTIPATH]) {
+               cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
+               cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+       }
+
        err = 0;
 errout:
        return err;
 }
 
-static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int ip6_route_multipath(struct fib6_config *cfg, int add)
+{
+       struct fib6_config r_cfg;
+       struct rtnexthop *rtnh;
+       int remaining;
+       int attrlen;
+       int err = 0, last_err = 0;
+
+beginning:
+       rtnh = (struct rtnexthop *)cfg->fc_mp;
+       remaining = cfg->fc_mp_len;
+
+       /* Parse a Multipath Entry */
+       while (rtnh_ok(rtnh, remaining)) {
+               memcpy(&r_cfg, cfg, sizeof(*cfg));
+               if (rtnh->rtnh_ifindex)
+                       r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+               attrlen = rtnh_attrlen(rtnh);
+               if (attrlen > 0) {
+                       struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+                       nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+                       if (nla) {
+                               nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+                               r_cfg.fc_flags |= RTF_GATEWAY;
+                       }
+               }
+               err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+               if (err) {
+                       last_err = err;
+                       /* If we are trying to remove a route, do not stop the
+                        * loop when ip6_route_del() fails (because next hop is
+                        * already gone), we should try to remove all next hops.
+                        */
+                       if (add) {
+                               /* If add fails, we should try to delete all
+                                * next hops that have been already added.
+                                */
+                               add = 0;
+                               goto beginning;
+                       }
+               }
+               /* Because each route is added like a single route we remove
+                * this flag after the first nexthop (if there is a collision,
+                * we have already fail to add the first nexthop:
+                * fib6_add_rt2node() has reject it).
+                */
+               cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
+               rtnh = rtnh_next(rtnh, &remaining);
+       }
+
+       return last_err;
+}
+
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
        struct fib6_config cfg;
        int err;
@@ -2364,10 +2364,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
        if (err < 0)
                return err;
 
-       return ip6_route_del(&cfg);
+       if (cfg.fc_mp)
+               return ip6_route_multipath(&cfg, 0);
+       else
+               return ip6_route_del(&cfg);
 }
 
-static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
 {
        struct fib6_config cfg;
        int err;
@@ -2376,7 +2379,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
        if (err < 0)
                return err;
 
-       return ip6_route_add(&cfg);
+       if (cfg.fc_mp)
+               return ip6_route_multipath(&cfg, 1);
+       else
+               return ip6_route_add(&cfg);
 }
 
 static inline size_t rt6_nlmsg_size(void)
@@ -2397,16 +2403,13 @@ static inline size_t rt6_nlmsg_size(void)
 static int rt6_fill_node(struct net *net,
                         struct sk_buff *skb, struct rt6_info *rt,
                         struct in6_addr *dst, struct in6_addr *src,
-                        int iif, int type, u32 pid, u32 seq,
+                        int iif, int type, u32 portid, u32 seq,
                         int prefix, int nowait, unsigned int flags)
 {
-       const struct inet_peer *peer;
        struct rtmsg *rtm;
        struct nlmsghdr *nlh;
        long expires;
        u32 table;
-       struct neighbour *n;
-       u32 ts, tsage;
 
        if (prefix) {   /* user wants prefix routes only */
                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2415,7 +2418,7 @@ static int rt6_fill_node(struct net *net,
                }
        }
 
-       nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
+       nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
        if (!nlh)
                return -EMSGSIZE;
 
@@ -2431,8 +2434,22 @@ static int rt6_fill_node(struct net *net,
        rtm->rtm_table = table;
        if (nla_put_u32(skb, RTA_TABLE, table))
                goto nla_put_failure;
-       if (rt->rt6i_flags & RTF_REJECT)
-               rtm->rtm_type = RTN_UNREACHABLE;
+       if (rt->rt6i_flags & RTF_REJECT) {
+               switch (rt->dst.error) {
+               case -EINVAL:
+                       rtm->rtm_type = RTN_BLACKHOLE;
+                       break;
+               case -EACCES:
+                       rtm->rtm_type = RTN_PROHIBIT;
+                       break;
+               case -EAGAIN:
+                       rtm->rtm_type = RTN_THROW;
+                       break;
+               default:
+                       rtm->rtm_type = RTN_UNREACHABLE;
+                       break;
+               }
+       }
        else if (rt->rt6i_flags & RTF_LOCAL)
                rtm->rtm_type = RTN_LOCAL;
        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
@@ -2444,10 +2461,12 @@ static int rt6_fill_node(struct net *net,
        rtm->rtm_protocol = rt->rt6i_protocol;
        if (rt->rt6i_flags & RTF_DYNAMIC)
                rtm->rtm_protocol = RTPROT_REDIRECT;
-       else if (rt->rt6i_flags & RTF_ADDRCONF)
-               rtm->rtm_protocol = RTPROT_KERNEL;
-       else if (rt->rt6i_flags & RTF_DEFAULT)
-               rtm->rtm_protocol = RTPROT_RA;
+       else if (rt->rt6i_flags & RTF_ADDRCONF) {
+               if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
+                       rtm->rtm_protocol = RTPROT_RA;
+               else
+                       rtm->rtm_protocol = RTPROT_KERNEL;
+       }
 
        if (rt->rt6i_flags & RTF_CACHE)
                rtm->rtm_flags |= RTM_F_CLONED;
@@ -2503,37 +2522,20 @@ static int rt6_fill_node(struct net *net,
        if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
                goto nla_put_failure;
 
-       rcu_read_lock();
-       n = dst_get_neighbour_noref(&rt->dst);
-       if (n) {
-               if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
-                       rcu_read_unlock();
+       if (rt->rt6i_flags & RTF_GATEWAY) {
+               if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
                        goto nla_put_failure;
-               }
        }
-       rcu_read_unlock();
 
        if (rt->dst.dev &&
            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
                goto nla_put_failure;
        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
                goto nla_put_failure;
-       if (!(rt->rt6i_flags & RTF_EXPIRES))
-               expires = 0;
-       else if (rt->dst.expires - jiffies < INT_MAX)
-               expires = rt->dst.expires - jiffies;
-       else
-               expires = INT_MAX;
 
-       peer = rt->rt6i_peer;
-       ts = tsage = 0;
-       if (peer && peer->tcp_ts_stamp) {
-               ts = peer->tcp_ts;
-               tsage = get_seconds() - peer->tcp_ts_stamp;
-       }
+       expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
 
-       if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
-                              expires, rt->dst.error) < 0)
+       if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
                goto nla_put_failure;
 
        return nlmsg_end(skb, nlh);
@@ -2556,11 +2558,11 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
        return rt6_fill_node(arg->net,
                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
-                    NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
+                    NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
                     prefix, 0, NLM_F_MULTI);
 }
 
-static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
 {
        struct net *net = sock_net(in_skb->sk);
        struct nlattr *tb[RTA_MAX+1];
@@ -2622,7 +2624,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb) {
-               dst_release(&rt->dst);
+               ip6_rt_put(rt);
                err = -ENOBUFS;
                goto errout;
        }
@@ -2636,14 +2638,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
        skb_dst_set(skb, &rt->dst);
 
        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
-                           RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
+                           RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
                            nlh->nlmsg_seq, 0, 0, 0);
        if (err < 0) {
                kfree_skb(skb);
                goto errout;
        }
 
-       err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+       err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 errout:
        return err;
 }
@@ -2663,14 +2665,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
                goto errout;
 
        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
-                               event, info->pid, seq, 0, 0, 0);
+                               event, info->portid, seq, 0, 0, 0);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
-       rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
+       rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
                    info->nlh, gfp_any());
        return;
 errout:
@@ -2716,7 +2718,6 @@ struct rt6_proc_arg
 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 {
        struct seq_file *m = p_arg;
-       struct neighbour *n;
 
        seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
 
@@ -2725,14 +2726,11 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 #else
        seq_puts(m, "00000000000000000000000000000000 00 ");
 #endif
-       rcu_read_lock();
-       n = dst_get_neighbour_noref(&rt->dst);
-       if (n) {
-               seq_printf(m, "%pi6", n->primary_key);
+       if (rt->rt6i_flags & RTF_GATEWAY) {
+               seq_printf(m, "%pi6", &rt->rt6i_gateway);
        } else {
                seq_puts(m, "00000000000000000000000000000000");
        }
-       rcu_read_unlock();
        seq_printf(m, " %08x %08x %08x %08x %8s\n",
                   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
                   rt->dst.__use, rt->rt6i_flags,
@@ -2901,6 +2899,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+
+               /* Don't export sysctls to unprivileged users */
+               if (net->user_ns != &init_user_ns)
+                       table[0].procname = NULL;
        }
 
        return table;
@@ -2961,10 +2963,6 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
-#ifdef CONFIG_PROC_FS
-       proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
-       proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
-#endif
        net->ipv6.ip6_rt_gc_expire = 30*HZ;
 
        ret = 0;
@@ -2985,10 +2983,6 @@ out_ip6_dst_ops:
 
 static void __net_exit ip6_route_net_exit(struct net *net)
 {
-#ifdef CONFIG_PROC_FS
-       proc_net_remove(net, "ipv6_route");
-       proc_net_remove(net, "rt6_stats");
-#endif
        kfree(net->ipv6.ip6_null_entry);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
        kfree(net->ipv6.ip6_prohibit_entry);
@@ -2997,11 +2991,58 @@ static void __net_exit ip6_route_net_exit(struct net *net)
        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
 }
 
+static int __net_init ip6_route_net_init_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+       proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
+       proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
+#endif
+       return 0;
+}
+
+static void __net_exit ip6_route_net_exit_late(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+       remove_proc_entry("ipv6_route", net->proc_net);
+       remove_proc_entry("rt6_stats", net->proc_net);
+#endif
+}
+
 static struct pernet_operations ip6_route_net_ops = {
        .init = ip6_route_net_init,
        .exit = ip6_route_net_exit,
 };
 
+static int __net_init ipv6_inetpeer_init(struct net *net)
+{
+       struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+       if (!bp)
+               return -ENOMEM;
+       inet_peer_base_init(bp);
+       net->ipv6.peers = bp;
+       return 0;
+}
+
+static void __net_exit ipv6_inetpeer_exit(struct net *net)
+{
+       struct inet_peer_base *bp = net->ipv6.peers;
+
+       net->ipv6.peers = NULL;
+       inetpeer_invalidate_tree(bp);
+       kfree(bp);
+}
+
+static struct pernet_operations ipv6_inetpeer_ops = {
+       .init   =       ipv6_inetpeer_init,
+       .exit   =       ipv6_inetpeer_exit,
+};
+
+static struct pernet_operations ip6_route_net_late_ops = {
+       .init = ip6_route_net_init_late,
+       .exit = ip6_route_net_exit_late,
+};
+
 static struct notifier_block ip6_route_dev_notifier = {
        .notifier_call = ip6_route_dev_notify,
        .priority = 0,
@@ -3022,10 +3063,14 @@ int __init ip6_route_init(void)
        if (ret)
                goto out_kmem_cache;
 
-       ret = register_pernet_subsys(&ip6_route_net_ops);
+       ret = register_pernet_subsys(&ipv6_inetpeer_ops);
        if (ret)
                goto out_dst_entries;
 
+       ret = register_pernet_subsys(&ip6_route_net_ops);
+       if (ret)
+               goto out_register_inetpeer;
+
        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
 
        /* Registering of the loopback is done before this portion of code,
@@ -3051,19 +3096,25 @@ int __init ip6_route_init(void)
        if (ret)
                goto xfrm6_init;
 
+       ret = register_pernet_subsys(&ip6_route_net_late_ops);
+       if (ret)
+               goto fib6_rules_init;
+
        ret = -ENOBUFS;
        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
-               goto fib6_rules_init;
+               goto out_register_late_subsys;
 
        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
        if (ret)
-               goto fib6_rules_init;
+               goto out_register_late_subsys;
 
 out:
        return ret;
 
+out_register_late_subsys:
+       unregister_pernet_subsys(&ip6_route_net_late_ops);
 fib6_rules_init:
        fib6_rules_cleanup();
 xfrm6_init:
@@ -3072,6 +3123,8 @@ out_fib6_init:
        fib6_gc_cleanup();
 out_register_subsys:
        unregister_pernet_subsys(&ip6_route_net_ops);
+out_register_inetpeer:
+       unregister_pernet_subsys(&ipv6_inetpeer_ops);
 out_dst_entries:
        dst_entries_destroy(&ip6_dst_blackhole_ops);
 out_kmem_cache:
@@ -3082,9 +3135,11 @@ out_kmem_cache:
 void ip6_route_cleanup(void)
 {
        unregister_netdevice_notifier(&ip6_route_dev_notifier);
+       unregister_pernet_subsys(&ip6_route_net_late_ops);
        fib6_rules_cleanup();
        xfrm6_fini();
        fib6_gc_cleanup();
+       unregister_pernet_subsys(&ipv6_inetpeer_ops);
        unregister_pernet_subsys(&ip6_route_net_ops);
        dst_entries_destroy(&ip6_dst_blackhole_ops);
        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);