]> nv-tegra.nvidia Code Review - linux-3.10.git/blobdiff - net/ipv6/datagram.c
ipv6,mcast: always hold idev->lock before mca_lock
[linux-3.10.git] / net / ipv6 / datagram.c
index cc518405b3e1efc29b87aea3cadb4e0c6d60068f..4b56cbbc789062d89c5a708386a03e5a8ded8bc6 100644 (file)
@@ -1,11 +1,9 @@
 /*
  *     common UDP/RAW code
- *     Linux INET6 implementation 
+ *     Linux INET6 implementation
  *
  *     Authors:
- *     Pedro Roque             <roque@di.fc.ul.pt>     
- *
- *     $Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $
+ *     Pedro Roque             <roque@di.fc.ul.pt>
  *
  *     This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
  *      2 of the License, or (at your option) any later version.
  */
 
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/in6.h>
 #include <linux/ipv6.h>
 #include <linux/route.h>
+#include <linux/slab.h>
+#include <linux/export.h>
 
 #include <net/ipv6.h>
 #include <net/ndisc.h>
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
+#include <net/dsfield.h>
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
 
+static bool ipv6_mapped_addr_any(const struct in6_addr *a)
+{
+       return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
+}
+
 int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
        struct sockaddr_in6     *usin = (struct sockaddr_in6 *) uaddr;
        struct inet_sock        *inet = inet_sk(sk);
        struct ipv6_pinfo       *np = inet6_sk(sk);
-       struct in6_addr         *daddr, *final_p = NULL, final;
+       struct in6_addr         *daddr, *final_p, final;
        struct dst_entry        *dst;
-       struct flowi            fl;
+       struct flowi6           fl6;
        struct ip6_flowlabel    *flowlabel = NULL;
+       struct ipv6_txoptions   *opt;
        int                     addr_type;
        int                     err;
 
@@ -54,19 +61,19 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        }
 
        if (addr_len < SIN6_LEN_RFC2133)
-               return -EINVAL;
+               return -EINVAL;
 
-       if (usin->sin6_family != AF_INET6) 
-               return -EAFNOSUPPORT;
+       if (usin->sin6_family != AF_INET6)
+               return -EAFNOSUPPORT;
 
-       memset(&fl, 0, sizeof(fl));
+       memset(&fl6, 0, sizeof(fl6));
        if (np->sndflow) {
-               fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-               if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-                       flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+               fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+               if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+                       flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
                        if (flowlabel == NULL)
                                return -EINVAL;
-                       ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+                       usin->sin6_addr = flowlabel->dst;
                }
        }
 
@@ -92,29 +99,32 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                sin.sin_addr.s_addr = daddr->s6_addr32[3];
                sin.sin_port = usin->sin6_port;
 
-               err = ip4_datagram_connect(sk, 
-                                          (struct sockaddr*) &sin, 
+               err = ip4_datagram_connect(sk,
+                                          (struct sockaddr *) &sin,
                                           sizeof(sin));
 
 ipv4_connected:
                if (err)
                        goto out;
-               
-               ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
 
-               if (ipv6_addr_any(&np->saddr)) {
-                       ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
-                                     inet->saddr);
-               }
+               ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
+
+               if (ipv6_addr_any(&np->saddr) ||
+                   ipv6_mapped_addr_any(&np->saddr))
+                       ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
 
-               if (ipv6_addr_any(&np->rcv_saddr)) {
-                       ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
-                                     inet->rcv_saddr);
+               if (ipv6_addr_any(&np->rcv_saddr) ||
+                   ipv6_mapped_addr_any(&np->rcv_saddr)) {
+                       ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
+                                              &np->rcv_saddr);
+                       if (sk->sk_prot->rehash)
+                               sk->sk_prot->rehash(sk);
                }
+
                goto out;
        }
 
-       if (addr_type&IPV6_ADDR_LINKLOCAL) {
+       if (__ipv6_addr_needs_scope_id(addr_type)) {
                if (addr_len >= sizeof(struct sockaddr_in6) &&
                    usin->sin6_scope_id) {
                        if (sk->sk_bound_dev_if &&
@@ -123,11 +133,11 @@ ipv4_connected:
                                goto out;
                        }
                        sk->sk_bound_dev_if = usin->sin6_scope_id;
-                       if (!sk->sk_bound_dev_if &&
-                           (addr_type & IPV6_ADDR_MULTICAST))
-                               fl.oif = np->mcast_oif;
                }
 
+               if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
+                       sk->sk_bound_dev_if = np->mcast_oif;
+
                /* Connect to link-local address requires an interface */
                if (!sk->sk_bound_dev_if) {
                        err = -EINVAL;
@@ -135,74 +145,72 @@ ipv4_connected:
                }
        }
 
-       ipv6_addr_copy(&np->daddr, daddr);
-       np->flow_label = fl.fl6_flowlabel;
+       np->daddr = *daddr;
+       np->flow_label = fl6.flowlabel;
 
-       inet->dport = usin->sin6_port;
+       inet->inet_dport = usin->sin6_port;
 
        /*
         *      Check for a route to destination an obtain the
         *      destination cache for it.
         */
 
-       fl.proto = sk->sk_protocol;
-       ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-       ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-       fl.oif = sk->sk_bound_dev_if;
-       fl.fl_ip_dport = inet->dport;
-       fl.fl_ip_sport = inet->sport;
-
-       if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
-               fl.oif = np->mcast_oif;
-
-       if (flowlabel) {
-               if (flowlabel->opt && flowlabel->opt->srcrt) {
-                       struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
-                       ipv6_addr_copy(&final, &fl.fl6_dst);
-                       ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-                       final_p = &final;
-               }
-       } else if (np->opt && np->opt->srcrt) {
-               struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-               ipv6_addr_copy(&final, &fl.fl6_dst);
-               ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-               final_p = &final;
-       }
+       fl6.flowi6_proto = sk->sk_protocol;
+       fl6.daddr = np->daddr;
+       fl6.saddr = np->saddr;
+       fl6.flowi6_oif = sk->sk_bound_dev_if;
+       fl6.flowi6_mark = sk->sk_mark;
+       fl6.fl6_dport = inet->inet_dport;
+       fl6.fl6_sport = inet->inet_sport;
 
-       err = ip6_dst_lookup(sk, &dst, &fl);
-       if (err)
-               goto out;
-       if (final_p)
-               ipv6_addr_copy(&fl.fl6_dst, final_p);
+       if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
+               fl6.flowi6_oif = np->mcast_oif;
 
-       if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+       security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+       opt = flowlabel ? flowlabel->opt : np->opt;
+       final_p = fl6_update_dst(&fl6, opt, &final);
+
+       dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
+       err = 0;
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
                goto out;
+       }
 
        /* source address lookup done in ip6_dst_lookup */
 
        if (ipv6_addr_any(&np->saddr))
-               ipv6_addr_copy(&np->saddr, &fl.fl6_src);
+               np->saddr = fl6.saddr;
 
        if (ipv6_addr_any(&np->rcv_saddr)) {
-               ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
-               inet->rcv_saddr = LOOPBACK4_IPV6;
+               np->rcv_saddr = fl6.saddr;
+               inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+               if (sk->sk_prot->rehash)
+                       sk->sk_prot->rehash(sk);
        }
 
        ip6_dst_store(sk, dst,
-                     ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
-                     &np->daddr : NULL);
+                     ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
+                     &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+                     ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
+                     &np->saddr :
+#endif
+                     NULL);
 
        sk->sk_state = TCP_ESTABLISHED;
 out:
        fl6_sock_release(flowlabel);
        return err;
 }
+EXPORT_SYMBOL_GPL(ip6_datagram_connect);
 
-void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 
-                    u16 port, u32 info, u8 *payload)
+void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+                    __be16 port, u32 info, u8 *payload)
 {
        struct ipv6_pinfo *np  = inet6_sk(sk);
-       struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
+       struct icmp6hdr *icmph = icmp6_hdr(skb);
        struct sock_exterr_skb *serr;
 
        if (!np->recverr)
@@ -212,25 +220,28 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
        if (!skb)
                return;
 
+       skb->protocol = htons(ETH_P_IPV6);
+
        serr = SKB_EXT_ERR(skb);
        serr->ee.ee_errno = err;
        serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
-       serr->ee.ee_type = icmph->icmp6_type; 
+       serr->ee.ee_type = icmph->icmp6_type;
        serr->ee.ee_code = icmph->icmp6_code;
        serr->ee.ee_pad = 0;
        serr->ee.ee_info = info;
        serr->ee.ee_data = 0;
-       serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+       serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
+                                 skb_network_header(skb);
        serr->port = port;
 
-       skb->h.raw = payload;
        __skb_pull(skb, payload - skb->data);
+       skb_reset_transport_header(skb);
 
        if (sock_queue_err_skb(sk, skb))
                kfree_skb(skb);
 }
 
-void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
+void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct sock_exterr_skb *serr;
@@ -244,29 +255,67 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
        if (!skb)
                return;
 
-       iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
-       skb->nh.ipv6h = iph;
-       ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+       skb->protocol = htons(ETH_P_IPV6);
+
+       skb_put(skb, sizeof(struct ipv6hdr));
+       skb_reset_network_header(skb);
+       iph = ipv6_hdr(skb);
+       iph->daddr = fl6->daddr;
 
        serr = SKB_EXT_ERR(skb);
        serr->ee.ee_errno = err;
        serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
-       serr->ee.ee_type = 0; 
+       serr->ee.ee_type = 0;
        serr->ee.ee_code = 0;
        serr->ee.ee_pad = 0;
        serr->ee.ee_info = info;
        serr->ee.ee_data = 0;
-       serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
-       serr->port = fl->fl_ip_dport;
+       serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
+       serr->port = fl6->fl6_dport;
 
-       skb->h.raw = skb->tail;
-       __skb_pull(skb, skb->tail - skb->data);
+       __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+       skb_reset_transport_header(skb);
 
        if (sock_queue_err_skb(sk, skb))
                kfree_skb(skb);
 }
 
-/* 
+void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
+{
+       struct ipv6_pinfo *np = inet6_sk(sk);
+       struct ipv6hdr *iph;
+       struct sk_buff *skb;
+       struct ip6_mtuinfo *mtu_info;
+
+       if (!np->rxopt.bits.rxpmtu)
+               return;
+
+       skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+       if (!skb)
+               return;
+
+       skb_put(skb, sizeof(struct ipv6hdr));
+       skb_reset_network_header(skb);
+       iph = ipv6_hdr(skb);
+       iph->daddr = fl6->daddr;
+
+       mtu_info = IP6CBMTU(skb);
+
+       mtu_info->ip6m_mtu = mtu;
+       mtu_info->ip6m_addr.sin6_family = AF_INET6;
+       mtu_info->ip6m_addr.sin6_port = 0;
+       mtu_info->ip6m_addr.sin6_flowinfo = 0;
+       mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
+       mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr;
+
+       __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+       skb_reset_transport_header(skb);
+
+       skb = xchg(&np->rxpmtu, skb);
+       kfree_skb(skb);
+}
+
+/*
  *     Handle MSG_ERRQUEUE
  */
 int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
@@ -302,21 +351,23 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 
        sin = (struct sockaddr_in6 *)msg->msg_name;
        if (sin) {
+               const unsigned char *nh = skb_network_header(skb);
                sin->sin6_family = AF_INET6;
                sin->sin6_flowinfo = 0;
-               sin->sin6_port = serr->port; 
-               sin->sin6_scope_id = 0;
-               if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
-                       ipv6_addr_copy(&sin->sin6_addr,
-                         (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+               sin->sin6_port = serr->port;
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
+                                                                 struct ipv6hdr, daddr);
+                       sin->sin6_addr = ip6h->daddr;
                        if (np->sndflow)
-                               sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
-                       if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-                               sin->sin6_scope_id = IP6CB(skb)->iif;
+                               sin->sin6_flowinfo = ip6_flowinfo(ip6h);
+                       sin->sin6_scope_id =
+                               ipv6_iface_scope_id(&sin->sin6_addr,
+                                                   IP6CB(skb)->iif);
                } else {
-                       ipv6_addr_set(&sin->sin6_addr, 0, 0,
-                                     htonl(0xffff),
-                                     *(u32*)(skb->nh.raw + serr->addr_offset));
+                       ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
+                                              &sin->sin6_addr);
+                       sin->sin6_scope_id = 0;
                }
        }
 
@@ -326,19 +377,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
        if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
                sin->sin6_family = AF_INET6;
                sin->sin6_flowinfo = 0;
-               sin->sin6_scope_id = 0;
-               if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
-                       ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       sin->sin6_addr = ipv6_hdr(skb)->saddr;
                        if (np->rxopt.all)
-                               datagram_recv_ctl(sk, msg, skb);
-                       if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-                               sin->sin6_scope_id = IP6CB(skb)->iif;
+                               ip6_datagram_recv_ctl(sk, msg, skb);
+                       sin->sin6_scope_id =
+                               ipv6_iface_scope_id(&sin->sin6_addr,
+                                                   IP6CB(skb)->iif);
                } else {
                        struct inet_sock *inet = inet_sk(sk);
 
-                       ipv6_addr_set(&sin->sin6_addr, 0, 0,
-                                     htonl(0xffff),
-                                     skb->nh.iph->saddr);
+                       ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+                                              &sin->sin6_addr);
+                       sin->sin6_scope_id = 0;
                        if (inet->cmsg_flags)
                                ip_cmsg_recv(msg, skb);
                }
@@ -362,45 +413,97 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
                spin_unlock_bh(&sk->sk_error_queue.lock);
        }
 
-out_free_skb:  
+out_free_skb:
        kfree_skb(skb);
 out:
        return err;
 }
+EXPORT_SYMBOL_GPL(ipv6_recv_error);
+
+/*
+ *     Handle IPV6_RECVPATHMTU
+ */
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+{
+       struct ipv6_pinfo *np = inet6_sk(sk);
+       struct sk_buff *skb;
+       struct sockaddr_in6 *sin;
+       struct ip6_mtuinfo mtu_info;
+       int err;
+       int copied;
+
+       err = -EAGAIN;
+       skb = xchg(&np->rxpmtu, NULL);
+       if (skb == NULL)
+               goto out;
+
+       copied = skb->len;
+       if (copied > len) {
+               msg->msg_flags |= MSG_TRUNC;
+               copied = len;
+       }
+       err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+       if (err)
+               goto out_free_skb;
+
+       sock_recv_timestamp(msg, sk, skb);
+
+       memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
 
+       sin = (struct sockaddr_in6 *)msg->msg_name;
+       if (sin) {
+               sin->sin6_family = AF_INET6;
+               sin->sin6_flowinfo = 0;
+               sin->sin6_port = 0;
+               sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
+               sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
+       }
+
+       put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
+
+       err = copied;
 
+out_free_skb:
+       kfree_skb(skb);
+out:
+       return err;
+}
 
-int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+
+int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+                         struct sk_buff *skb)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct inet6_skb_parm *opt = IP6CB(skb);
+       unsigned char *nh = skb_network_header(skb);
 
        if (np->rxopt.bits.rxinfo) {
                struct in6_pktinfo src_info;
 
                src_info.ipi6_ifindex = opt->iif;
-               ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+               src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
                put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
        }
 
        if (np->rxopt.bits.rxhlim) {
-               int hlim = skb->nh.ipv6h->hop_limit;
+               int hlim = ipv6_hdr(skb)->hop_limit;
                put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
        }
 
        if (np->rxopt.bits.rxtclass) {
-               int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff;
+               int tclass = ipv6_get_dsfield(ipv6_hdr(skb));
                put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
        }
 
-       if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
-               u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
-               put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+       if (np->rxopt.bits.rxflow) {
+               __be32 flowinfo = ip6_flowinfo((struct ipv6hdr *)nh);
+               if (flowinfo)
+                       put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
        }
 
        /* HbH is allowed only once */
        if (np->rxopt.bits.hopopts && opt->hop) {
-               u8 *ptr = skb->nh.raw + opt->hop;
+               u8 *ptr = nh + opt->hop;
                put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
        }
 
@@ -411,18 +514,18 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
                 * report extension headers (except for HbH)
                 * in order.
                 *
-                * Also note that IPV6_RECVRTHDRDSTOPTS is NOT 
+                * Also note that IPV6_RECVRTHDRDSTOPTS is NOT
                 * (and WILL NOT be) defined because
                 * IPV6_RECVDSTOPTS is more generic. --yoshfuji
                 */
                unsigned int off = sizeof(struct ipv6hdr);
-               u8 nexthdr = skb->nh.ipv6h->nexthdr;
+               u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 
                while (off <= opt->lastopt) {
-                       unsigned len;
-                       u8 *ptr = skb->nh.raw + off;
+                       unsigned int len;
+                       u8 *ptr = nh + off;
 
-                       switch(nexthdr) {
+                       switch (nexthdr) {
                        case IPPROTO_DSTOPTS:
                                nexthdr = ptr[0];
                                len = (ptr[1] + 1) << 3;
@@ -437,7 +540,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
                                break;
                        case IPPROTO_AH:
                                nexthdr = ptr[0];
-                               len = (ptr[1] + 1) << 2;
+                               len = (ptr[1] + 2) << 2;
                                break;
                        default:
                                nexthdr = ptr[0];
@@ -454,35 +557,58 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
                struct in6_pktinfo src_info;
 
                src_info.ipi6_ifindex = opt->iif;
-               ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+               src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
                put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
        }
        if (np->rxopt.bits.rxohlim) {
-               int hlim = skb->nh.ipv6h->hop_limit;
+               int hlim = ipv6_hdr(skb)->hop_limit;
                put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
        }
        if (np->rxopt.bits.ohopopts && opt->hop) {
-               u8 *ptr = skb->nh.raw + opt->hop;
+               u8 *ptr = nh + opt->hop;
                put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
        }
        if (np->rxopt.bits.odstopts && opt->dst0) {
-               u8 *ptr = skb->nh.raw + opt->dst0;
+               u8 *ptr = nh + opt->dst0;
                put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
        }
        if (np->rxopt.bits.osrcrt && opt->srcrt) {
-               struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+               struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
                put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
        }
        if (np->rxopt.bits.odstopts && opt->dst1) {
-               u8 *ptr = skb->nh.raw + opt->dst1;
+               u8 *ptr = nh + opt->dst1;
                put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
        }
+       if (np->rxopt.bits.rxorigdstaddr) {
+               struct sockaddr_in6 sin6;
+               __be16 *ports = (__be16 *) skb_transport_header(skb);
+
+               if (skb_transport_offset(skb) + 4 <= skb->len) {
+                       /* All current transport protocols have the port numbers in the
+                        * first four bytes of the transport header and this function is
+                        * written with this assumption in mind.
+                        */
+
+                       sin6.sin6_family = AF_INET6;
+                       sin6.sin6_addr = ipv6_hdr(skb)->daddr;
+                       sin6.sin6_port = ports[1];
+                       sin6.sin6_flowinfo = 0;
+                       sin6.sin6_scope_id =
+                               ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr,
+                                                   opt->iif);
+
+                       put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
+               }
+       }
        return 0;
 }
+EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
 
-int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
-                     struct ipv6_txoptions *opt,
-                     int *hlimit, int *tclass)
+int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
+                         struct msghdr *msg, struct flowi6 *fl6,
+                         struct ipv6_txoptions *opt,
+                         int *hlimit, int *tclass, int *dontfrag)
 {
        struct in6_pktinfo *src_info;
        struct cmsghdr *cmsg;
@@ -493,7 +619,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
        for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
                int addr_type;
-               struct net_device *dev = NULL;
 
                if (!CMSG_OK(msg, cmsg)) {
                        err = -EINVAL;
@@ -504,65 +629,75 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                        continue;
 
                switch (cmsg->cmsg_type) {
-               case IPV6_PKTINFO:
-               case IPV6_2292PKTINFO:
-                       if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
+               case IPV6_PKTINFO:
+               case IPV6_2292PKTINFO:
+                   {
+                       struct net_device *dev = NULL;
+
+                       if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
                                err = -EINVAL;
                                goto exit_f;
                        }
 
                        src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
-                       
+
                        if (src_info->ipi6_ifindex) {
-                               if (fl->oif && src_info->ipi6_ifindex != fl->oif)
+                               if (fl6->flowi6_oif &&
+                                   src_info->ipi6_ifindex != fl6->flowi6_oif)
                                        return -EINVAL;
-                               fl->oif = src_info->ipi6_ifindex;
+                               fl6->flowi6_oif = src_info->ipi6_ifindex;
                        }
 
-                       addr_type = ipv6_addr_type(&src_info->ipi6_addr);
+                       addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
 
-                       if (addr_type == IPV6_ADDR_ANY)
-                               break;
-                       
-                       if (addr_type & IPV6_ADDR_LINKLOCAL) {
-                               if (!src_info->ipi6_ifindex)
-                                       return -EINVAL;
-                               else {
-                                       dev = dev_get_by_index(src_info->ipi6_ifindex);
-                                       if (!dev)
-                                               return -ENODEV;
+                       rcu_read_lock();
+                       if (fl6->flowi6_oif) {
+                               dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
+                               if (!dev) {
+                                       rcu_read_unlock();
+                                       return -ENODEV;
                                }
+                       } else if (addr_type & IPV6_ADDR_LINKLOCAL) {
+                               rcu_read_unlock();
+                               return -EINVAL;
                        }
-                       if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) {
-                               if (dev)
-                                       dev_put(dev);
-                               err = -EINVAL;
-                               goto exit_f;
+
+                       if (addr_type != IPV6_ADDR_ANY) {
+                               int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
+                               if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
+                                   !ipv6_chk_addr(net, &src_info->ipi6_addr,
+                                                  strict ? dev : NULL, 0))
+                                       err = -EINVAL;
+                               else
+                                       fl6->saddr = src_info->ipi6_addr;
                        }
-                       if (dev)
-                               dev_put(dev);
 
-                       ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
+                       rcu_read_unlock();
+
+                       if (err)
+                               goto exit_f;
+
                        break;
+                   }
 
                case IPV6_FLOWINFO:
-                        if (cmsg->cmsg_len < CMSG_LEN(4)) {
+                       if (cmsg->cmsg_len < CMSG_LEN(4)) {
                                err = -EINVAL;
                                goto exit_f;
                        }
 
-                       if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
-                               if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
+                       if (fl6->flowlabel&IPV6_FLOWINFO_MASK) {
+                               if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
                                        err = -EINVAL;
                                        goto exit_f;
                                }
                        }
-                       fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg);
+                       fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg);
                        break;
 
                case IPV6_2292HOPOPTS:
                case IPV6_HOPOPTS:
-                        if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+                       if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
                                err = -EINVAL;
                                goto exit_f;
                        }
@@ -573,7 +708,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                                err = -EINVAL;
                                goto exit_f;
                        }
-                       if (!capable(CAP_NET_RAW)) {
+                       if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
                                err = -EPERM;
                                goto exit_f;
                        }
@@ -582,7 +717,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                        break;
 
                case IPV6_2292DSTOPTS:
-                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+                       if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
                                err = -EINVAL;
                                goto exit_f;
                        }
@@ -593,7 +728,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                                err = -EINVAL;
                                goto exit_f;
                        }
-                       if (!capable(CAP_NET_RAW)) {
+                       if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
                                err = -EPERM;
                                goto exit_f;
                        }
@@ -618,7 +753,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                                err = -EINVAL;
                                goto exit_f;
                        }
-                       if (!capable(CAP_NET_RAW)) {
+                       if (!ns_capable(net->user_ns, CAP_NET_RAW)) {
                                err = -EPERM;
                                goto exit_f;
                        }
@@ -633,24 +768,31 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
                case IPV6_2292RTHDR:
                case IPV6_RTHDR:
-                        if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
+                       if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
                                err = -EINVAL;
                                goto exit_f;
                        }
 
                        rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
 
-                       /*
-                        *      TYPE 0
-                        */
-                       if (rthdr->type) {
+                       switch (rthdr->type) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+                       case IPV6_SRCRT_TYPE_2:
+                               if (rthdr->hdrlen != 2 ||
+                                   rthdr->segments_left != 1) {
+                                       err = -EINVAL;
+                                       goto exit_f;
+                               }
+                               break;
+#endif
+                       default:
                                err = -EINVAL;
                                goto exit_f;
                        }
 
                        len = ((rthdr->hdrlen + 1) << 3);
 
-                        if (cmsg->cmsg_len < CMSG_LEN(len)) {
+                       if (cmsg->cmsg_len < CMSG_LEN(len)) {
                                err = -EINVAL;
                                goto exit_f;
                        }
@@ -683,6 +825,11 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                        }
 
                        *hlimit = *(int *)CMSG_DATA(cmsg);
+                       if (*hlimit < -1 || *hlimit > 0xff) {
+                               err = -EINVAL;
+                               goto exit_f;
+                       }
+
                        break;
 
                case IPV6_TCLASS:
@@ -690,27 +837,45 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                        int tc;
 
                        err = -EINVAL;
-                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
                                goto exit_f;
-                       }
 
                        tc = *(int *)CMSG_DATA(cmsg);
-                       if (tc < 0 || tc > 0xff)
+                       if (tc < -1 || tc > 0xff)
                                goto exit_f;
 
                        err = 0;
                        *tclass = tc;
 
+                       break;
+                   }
+
+               case IPV6_DONTFRAG:
+                   {
+                       int df;
+
+                       err = -EINVAL;
+                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+                               goto exit_f;
+
+                       df = *(int *)CMSG_DATA(cmsg);
+                       if (df < 0 || df > 1)
+                               goto exit_f;
+
+                       err = 0;
+                       *dontfrag = df;
+
                        break;
                    }
                default:
                        LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
-                                      cmsg->cmsg_type);
+                                      cmsg->cmsg_type);
                        err = -EINVAL;
-                       break;
-               };
+                       goto exit_f;
+               }
        }
 
 exit_f:
        return err;
 }
+EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);