[IPV6]: Added GSO support for TCPv6
[linux-2.6.git] / net / ipv6 / ipv6_sockglue.c
index 7516b88..25f8bf8 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/capability.h>
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 
 #include <asm/uaccess.h>
 
-DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
+
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
+{
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       struct ipv6hdr *ipv6h;
+       struct inet6_protocol *ops;
+       int proto;
+
+       if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+               goto out;
+
+       ipv6h = skb->nh.ipv6h;
+       proto = ipv6h->nexthdr;
+       __skb_pull(skb, sizeof(*ipv6h));
+
+       rcu_read_lock();
+       for (;;) {
+               struct ipv6_opt_hdr *opth;
+               int len;
+
+               if (proto != NEXTHDR_HOP) {
+                       ops = rcu_dereference(inet6_protos[proto]);
+
+                       if (unlikely(!ops))
+                               goto unlock;
+
+                       if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+                               break;
+               }
+
+               if (unlikely(!pskb_may_pull(skb, 8)))
+                       goto unlock;
+
+               opth = (void *)skb->data;
+               len = opth->hdrlen * 8 + 8;
+
+               if (unlikely(!pskb_may_pull(skb, len)))
+                       goto unlock;
+
+               proto = opth->nexthdr;
+               __skb_pull(skb, len);
+       }
+
+       skb->h.raw = skb->data;
+       if (likely(ops->gso_segment))
+               segs = ops->gso_segment(skb, features);
+
+unlock:
+       rcu_read_unlock();
+
+       if (unlikely(IS_ERR(segs)))
+               goto out;
+
+       for (skb = segs; skb; skb = skb->next) {
+               ipv6h = skb->nh.ipv6h;
+               ipv6h->payload_len = htons(skb->len - skb->mac_len);
+       }
+
+out:
+       return segs;
+}
 
 static struct packet_type ipv6_packet_type = {
        .type = __constant_htons(ETH_P_IPV6), 
        .func = ipv6_rcv,
+       .gso_segment = ipv6_gso_segment,
 };
 
 struct ip6_ra_chain *ip6_ra_chain;
@@ -80,8 +143,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
                if (ra->sk == sk) {
                        if (sel>=0) {
                                write_unlock_bh(&ip6_ra_lock);
-                               if (new_ra)
-                                       kfree(new_ra);
+                               kfree(new_ra);
                                return -EADDRINUSE;
                        }
 
@@ -109,19 +171,13 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
        return 0;
 }
 
-int ipv6_setsockopt(struct sock *sk, int level, int optname,
+static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
                    char __user *optval, int optlen)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        int val, valbool;
        int retv = -ENOPROTOOPT;
 
-       if (level == SOL_IP && sk->sk_type != SOCK_RAW)
-               return udp_prot.setsockopt(sk, level, optname, optval, optlen);
-
-       if(level!=SOL_IPV6)
-               goto out;
-
        if (optval == NULL)
                val=0;
        else if (get_user(val, (int __user *) optval))
@@ -164,17 +220,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                        sk_refcnt_debug_dec(sk);
 
                        if (sk->sk_protocol == IPPROTO_TCP) {
-                               struct tcp_sock *tp = tcp_sk(sk);
+                               struct inet_connection_sock *icsk = inet_csk(sk);
 
                                local_bh_disable();
                                sock_prot_dec_use(sk->sk_prot);
                                sock_prot_inc_use(&tcp_prot);
                                local_bh_enable();
                                sk->sk_prot = &tcp_prot;
-                               tp->af_specific = &ipv4_specific;
+                               icsk->icsk_af_ops = &ipv4_specific;
                                sk->sk_socket->ops = &inet_stream_ops;
                                sk->sk_family = PF_INET;
-                               tcp_sync_mss(sk, tp->pmtu_cookie);
+                               tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                        } else {
                                local_bh_disable();
                                sock_prot_dec_use(sk->sk_prot);
@@ -210,39 +266,140 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                retv = 0;
                break;
 
-       case IPV6_PKTINFO:
+       case IPV6_RECVPKTINFO:
                np->rxopt.bits.rxinfo = valbool;
                retv = 0;
                break;
+               
+       case IPV6_2292PKTINFO:
+               np->rxopt.bits.rxoinfo = valbool;
+               retv = 0;
+               break;
 
-       case IPV6_HOPLIMIT:
+       case IPV6_RECVHOPLIMIT:
                np->rxopt.bits.rxhlim = valbool;
                retv = 0;
                break;
 
-       case IPV6_RTHDR:
+       case IPV6_2292HOPLIMIT:
+               np->rxopt.bits.rxohlim = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_RECVRTHDR:
                if (val < 0 || val > 2)
                        goto e_inval;
                np->rxopt.bits.srcrt = val;
                retv = 0;
                break;
 
-       case IPV6_HOPOPTS:
+       case IPV6_2292RTHDR:
+               if (val < 0 || val > 2)
+                       goto e_inval;
+               np->rxopt.bits.osrcrt = val;
+               retv = 0;
+               break;
+
+       case IPV6_RECVHOPOPTS:
                np->rxopt.bits.hopopts = valbool;
                retv = 0;
                break;
 
-       case IPV6_DSTOPTS:
+       case IPV6_2292HOPOPTS:
+               np->rxopt.bits.ohopopts = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_RECVDSTOPTS:
                np->rxopt.bits.dstopts = valbool;
                retv = 0;
                break;
 
+       case IPV6_2292DSTOPTS:
+               np->rxopt.bits.odstopts = valbool;
+               retv = 0;
+               break;
+
+       case IPV6_TCLASS:
+               if (val < 0 || val > 0xff)
+                       goto e_inval;
+               np->tclass = val;
+               retv = 0;
+               break;
+               
+       case IPV6_RECVTCLASS:
+               np->rxopt.bits.rxtclass = valbool;
+               retv = 0;
+               break;
+
        case IPV6_FLOWINFO:
                np->rxopt.bits.rxflow = valbool;
                retv = 0;
                break;
 
-       case IPV6_PKTOPTIONS:
+       case IPV6_HOPOPTS:
+       case IPV6_RTHDRDSTOPTS:
+       case IPV6_RTHDR:
+       case IPV6_DSTOPTS:
+       {
+               struct ipv6_txoptions *opt;
+               if (optlen == 0)
+                       optval = NULL;
+
+               /* hop-by-hop / destination options are privileged option */
+               retv = -EPERM;
+               if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
+                       break;
+
+               retv = -EINVAL;
+               if (optlen & 0x7 || optlen > 8 * 255)
+                       break;
+
+               opt = ipv6_renew_options(sk, np->opt, optname,
+                                        (struct ipv6_opt_hdr __user *)optval,
+                                        optlen);
+               if (IS_ERR(opt)) {
+                       retv = PTR_ERR(opt);
+                       break;
+               }
+
+               /* routing header option needs extra check */
+               if (optname == IPV6_RTHDR && opt->srcrt) {
+                       struct ipv6_rt_hdr *rthdr = opt->srcrt;
+                       if (rthdr->type)
+                               goto sticky_done;
+                       if ((rthdr->hdrlen & 1) ||
+                           (rthdr->hdrlen >> 1) != rthdr->segments_left)
+                               goto sticky_done;
+               }
+
+               retv = 0;
+               if (inet_sk(sk)->is_icsk) {
+                       if (opt) {
+                               struct inet_connection_sock *icsk = inet_csk(sk);
+                               if (!((1 << sk->sk_state) &
+                                     (TCPF_LISTEN | TCPF_CLOSE))
+                                   && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
+                                       icsk->icsk_ext_hdr_len =
+                                               opt->opt_flen + opt->opt_nflen;
+                                       icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+                               }
+                       }
+                       opt = xchg(&np->opt, opt);
+                       sk_dst_reset(sk);
+               } else {
+                       write_lock(&sk->sk_dst_lock);
+                       opt = xchg(&np->opt, opt);
+                       write_unlock(&sk->sk_dst_lock);
+                       sk_dst_reset(sk);
+               }
+sticky_done:
+               if (opt)
+                       sock_kfree_s(sk, opt, opt->tot_len);
+               break;
+       }
+
+       case IPV6_2292PKTOPTIONS:
        {
                struct ipv6_txoptions *opt = NULL;
                struct msghdr msg;
@@ -276,19 +433,20 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                msg.msg_controllen = optlen;
                msg.msg_control = (void*)(opt+1);
 
-               retv = datagram_send_ctl(&msg, &fl, opt, &junk);
+               retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk);
                if (retv)
                        goto done;
 update:
                retv = 0;
-               if (sk->sk_type == SOCK_STREAM) {
+               if (inet_sk(sk)->is_icsk) {
                        if (opt) {
-                               struct tcp_sock *tp = tcp_sk(sk);
+                               struct inet_connection_sock *icsk = inet_csk(sk);
                                if (!((1 << sk->sk_state) &
                                      (TCPF_LISTEN | TCPF_CLOSE))
                                    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-                                       tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-                                       tcp_sync_mss(sk, tp->pmtu_cookie);
+                                       icsk->icsk_ext_hdr_len =
+                                               opt->opt_flen + opt->opt_nflen;
+                                       icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
                                }
                        }
                        opt = xchg(&np->opt, opt);
@@ -448,7 +606,7 @@ done:
                        retv = -ENOBUFS;
                        break;
                }
-               gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
+               gsf = kmalloc(optlen,GFP_KERNEL);
                if (gsf == 0) {
                        retv = -ENOBUFS;
                        break;
@@ -511,17 +669,9 @@ done:
                retv = xfrm_user_policy(sk, optname, optval, optlen);
                break;
 
-#ifdef CONFIG_NETFILTER
-       default:
-               retv = nf_setsockopt(sk, PF_INET6, optname, optval, 
-                                           optlen);
-               break;
-#endif
-
        }
        release_sock(sk);
 
-out:
        return retv;
 
 e_inval:
@@ -529,17 +679,83 @@ e_inval:
        return -EINVAL;
 }
 
-int ipv6_getsockopt(struct sock *sk, int level, int optname,
+int ipv6_setsockopt(struct sock *sk, int level, int optname,
+                   char __user *optval, int optlen)
+{
+       int err;
+
+       if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+               return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+       if (level != SOL_IPV6)
+               return -ENOPROTOOPT;
+
+       err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible ENOPROTOOPTs except default case */
+       if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
+                       optname != IPV6_XFRM_POLICY) {
+               lock_sock(sk);
+               err = nf_setsockopt(sk, PF_INET6, optname, optval,
+                               optlen);
+               release_sock(sk);
+       }
+#endif
+       return err;
+}
+
+
+#ifdef CONFIG_COMPAT
+int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
+                          char __user *optval, int optlen)
+{
+       int err;
+
+       if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
+               if (udp_prot.compat_setsockopt != NULL)
+                       return udp_prot.compat_setsockopt(sk, level, optname,
+                                                         optval, optlen);
+               return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+       }
+
+       if (level != SOL_IPV6)
+               return -ENOPROTOOPT;
+
+       err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible ENOPROTOOPTs except default case */
+       if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
+           optname != IPV6_XFRM_POLICY) {
+               lock_sock(sk);
+               err = compat_nf_setsockopt(sk, PF_INET6, optname,
+                                          optval, optlen);
+               release_sock(sk);
+       }
+#endif
+       return err;
+}
+
+EXPORT_SYMBOL(compat_ipv6_setsockopt);
+#endif
+
+static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_opt_hdr *hdr,
+                                 char __user *optval, int len)
+{
+       if (!hdr)
+               return 0;
+       len = min_t(int, len, ipv6_optlen(hdr));
+       if (copy_to_user(optval, hdr, ipv6_optlen(hdr)))
+               return -EFAULT;
+       return len;
+}
+
+static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
                    char __user *optval, int __user *optlen)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        int len;
        int val;
 
-       if (level == SOL_IP && sk->sk_type != SOCK_RAW)
-               return udp_prot.getsockopt(sk, level, optname, optval, optlen);
-       if(level!=SOL_IPV6)
-               return -ENOPROTOOPT;
        if (get_user(len, optlen))
                return -EFAULT;
        switch (optname) {
@@ -567,7 +783,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                return err;
        }
 
-       case IPV6_PKTOPTIONS:
+       case IPV6_2292PKTOPTIONS:
        {
                struct msghdr msg;
                struct sk_buff *skb;
@@ -601,6 +817,16 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                                int hlim = np->mcast_hops;
                                put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
                        }
+                       if (np->rxopt.bits.rxoinfo) {
+                               struct in6_pktinfo src_info;
+                               src_info.ipi6_ifindex = np->mcast_oif;
+                               ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
+                               put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
+                       }
+                       if (np->rxopt.bits.rxohlim) {
+                               int hlim = np->mcast_hops;
+                               put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
+                       }
                }
                len -= msg.msg_controllen;
                return put_user(len, optlen);
@@ -625,26 +851,67 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                val = np->ipv6only;
                break;
 
-       case IPV6_PKTINFO:
+       case IPV6_RECVPKTINFO:
                val = np->rxopt.bits.rxinfo;
                break;
 
-       case IPV6_HOPLIMIT:
+       case IPV6_2292PKTINFO:
+               val = np->rxopt.bits.rxoinfo;
+               break;
+
+       case IPV6_RECVHOPLIMIT:
                val = np->rxopt.bits.rxhlim;
                break;
 
-       case IPV6_RTHDR:
+       case IPV6_2292HOPLIMIT:
+               val = np->rxopt.bits.rxohlim;
+               break;
+
+       case IPV6_RECVRTHDR:
                val = np->rxopt.bits.srcrt;
                break;
 
+       case IPV6_2292RTHDR:
+               val = np->rxopt.bits.osrcrt;
+               break;
+
        case IPV6_HOPOPTS:
+       case IPV6_RTHDRDSTOPTS:
+       case IPV6_RTHDR:
+       case IPV6_DSTOPTS:
+       {
+
+               lock_sock(sk);
+               len = ipv6_getsockopt_sticky(sk, np->opt->hopopt,
+                                            optval, len);
+               release_sock(sk);
+               return put_user(len, optlen);
+       }
+
+       case IPV6_RECVHOPOPTS:
                val = np->rxopt.bits.hopopts;
                break;
 
-       case IPV6_DSTOPTS:
+       case IPV6_2292HOPOPTS:
+               val = np->rxopt.bits.ohopopts;
+               break;
+
+       case IPV6_RECVDSTOPTS:
                val = np->rxopt.bits.dstopts;
                break;
 
+       case IPV6_2292DSTOPTS:
+               val = np->rxopt.bits.odstopts;
+               break;
+
+       case IPV6_TCLASS:
+               val = np->tclass;
+               break;
+
+       case IPV6_RECVTCLASS:
+               val = np->rxopt.bits.rxtclass;
+               break;
+
        case IPV6_FLOWINFO:
                val = np->rxopt.bits.rxflow;
                break;
@@ -678,17 +945,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
                break;
 
        default:
-#ifdef CONFIG_NETFILTER
-               lock_sock(sk);
-               val = nf_getsockopt(sk, PF_INET6, optname, optval, 
-                                   &len);
-               release_sock(sk);
-               if (val >= 0)
-                       val = put_user(len, optlen);
-               return val;
-#else
                return -EINVAL;
-#endif
        }
        len = min_t(unsigned int, sizeof(int), len);
        if(put_user(len, optlen))
@@ -698,6 +955,78 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
        return 0;
 }
 
+int ipv6_getsockopt(struct sock *sk, int level, int optname,
+                   char __user *optval, int __user *optlen)
+{
+       int err;
+
+       if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+               return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+       if(level != SOL_IPV6)
+               return -ENOPROTOOPT;
+
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible EINVALs except default case */
+       if (err == -EINVAL && optname != IPV6_ADDRFORM &&
+                       optname != MCAST_MSFILTER) {
+               int len;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               lock_sock(sk);
+               err = nf_getsockopt(sk, PF_INET6, optname, optval,
+                               &len);
+               release_sock(sk);
+               if (err >= 0)
+                       err = put_user(len, optlen);
+       }
+#endif
+       return err;
+}
+
+#ifdef CONFIG_COMPAT
+int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
+                          char __user *optval, int __user *optlen)
+{
+       int err;
+
+       if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
+               if (udp_prot.compat_getsockopt != NULL)
+                       return udp_prot.compat_getsockopt(sk, level, optname,
+                                                         optval, optlen);
+               return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+       }
+
+       if (level != SOL_IPV6)
+               return -ENOPROTOOPT;
+
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+#ifdef CONFIG_NETFILTER
+       /* we need to exclude all possible EINVALs except default case */
+       if (err == -EINVAL && optname != IPV6_ADDRFORM &&
+                       optname != MCAST_MSFILTER) {
+               int len;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               lock_sock(sk);
+               err = compat_nf_getsockopt(sk, PF_INET6,
+                                          optname, optval, &len);
+               release_sock(sk);
+               if (err >= 0)
+                       err = put_user(len, optlen);
+       }
+#endif
+       return err;
+}
+
+EXPORT_SYMBOL(compat_ipv6_getsockopt);
+#endif
+
 void __init ipv6_packet_init(void)
 {
        dev_add_pack(&ipv6_packet_type);