net: Fix build with INET disabled.
[linux-2.6.git] / net / core / sock.c
index b0ba569..5c5af99 100644 (file)
 #include <linux/tcp.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/user_namespace.h>
+#include <linux/jump_label.h>
+#include <linux/memcontrol.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/net_tstamp.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
+#include <net/cls_cgroup.h>
+#include <net/netprio_cgroup.h>
 
 #include <linux/filter.h>
 
+#include <trace/events/sock.h>
+
 #ifdef CONFIG_INET
 #include <net/tcp.h>
 #endif
 
+static DEFINE_MUTEX(proto_list_mutex);
+static LIST_HEAD(proto_list);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+       struct proto *proto;
+       int ret = 0;
+
+       mutex_lock(&proto_list_mutex);
+       list_for_each_entry(proto, &proto_list, node) {
+               if (proto->init_cgroup) {
+                       ret = proto->init_cgroup(cgrp, ss);
+                       if (ret)
+                               goto out;
+               }
+       }
+
+       mutex_unlock(&proto_list_mutex);
+       return ret;
+out:
+       list_for_each_entry_continue_reverse(proto, &proto_list, node)
+               if (proto->destroy_cgroup)
+                       proto->destroy_cgroup(cgrp, ss);
+       mutex_unlock(&proto_list_mutex);
+       return ret;
+}
+
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+       struct proto *proto;
+
+       mutex_lock(&proto_list_mutex);
+       list_for_each_entry_reverse(proto, &proto_list, node)
+               if (proto->destroy_cgroup)
+                       proto->destroy_cgroup(cgrp, ss);
+       mutex_unlock(&proto_list_mutex);
+}
+#endif
+
 /*
  * Each address family might have different locking rules, so we have
  * one slock key per address family:
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
+struct jump_label_key memcg_socket_limit_enabled;
+EXPORT_SYMBOL(memcg_socket_limit_enabled);
+
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
  * locks is fast):
  */
-static const char *af_family_key_strings[AF_MAX+1] = {
+static const char *const af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
@@ -155,10 +205,10 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
-  "sk_lock-AF_IEEE802154",
-  "sk_lock-AF_MAX"
+  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
+  "sk_lock-AF_NFC"   , "sk_lock-AF_MAX"
 };
-static const char *af_family_slock_key_strings[AF_MAX+1] = {
+static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
@@ -171,10 +221,10 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
-  "slock-AF_IEEE802154",
-  "slock-AF_MAX"
+  "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
+  "slock-AF_NFC"   , "slock-AF_MAX"
 };
-static const char *af_family_clock_key_strings[AF_MAX+1] = {
+static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
@@ -187,8 +237,8 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
-  "clock-AF_IEEE802154",
-  "clock-AF_MAX"
+  "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
+  "clock-AF_NFC"   , "clock-AF_MAX"
 };
 
 /*
@@ -203,7 +253,7 @@ static struct lock_class_key af_callback_keys[AF_MAX];
  * not depend upon such differences.
  */
 #define _SK_MEM_PACKETS                256
-#define _SK_MEM_OVERHEAD       (sizeof(struct sk_buff) + 256)
+#define _SK_MEM_OVERHEAD       SKB_TRUESIZE(256)
 #define SK_WMEM_MAX            (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 #define SK_RMEM_MAX            (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
@@ -213,10 +263,21 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
-/* Maximal space eaten by iovec or ancilliary data plus some space */
+/* Maximal space eaten by iovec or ancillary data plus some space */
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
+#if defined(CONFIG_CGROUPS)
+#if !defined(CONFIG_NET_CLS_CGROUP)
+int net_cls_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_cls_subsys_id);
+#endif
+#if !defined(CONFIG_NETPRIO_CGROUP)
+int net_prio_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_prio_subsys_id);
+#endif
+#endif
+
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
        struct timeval tv;
@@ -260,39 +321,38 @@ static void sock_warn_obsolete_bsdism(const char *name)
        }
 }
 
-static void sock_disable_timestamp(struct sock *sk, int flag)
+#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+
+static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
 {
-       if (sock_flag(sk, flag)) {
-               sock_reset_flag(sk, flag);
-               if (!sock_flag(sk, SOCK_TIMESTAMP) &&
-                   !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
+       if (sk->sk_flags & flags) {
+               sk->sk_flags &= ~flags;
+               if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
                        net_disable_timestamp();
-               }
        }
 }
 
 
 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-       int err = 0;
+       int err;
        int skb_len;
+       unsigned long flags;
+       struct sk_buff_head *list = &sk->sk_receive_queue;
 
-       /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
-          number of warnings when compiling with -W --ANK
-        */
-       if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-           (unsigned)sk->sk_rcvbuf) {
-               err = -ENOMEM;
-               goto out;
+       if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+               atomic_inc(&sk->sk_drops);
+               trace_sock_rcvqueue_full(sk, skb);
+               return -ENOMEM;
        }
 
        err = sk_filter(sk, skb);
        if (err)
-               goto out;
+               return err;
 
        if (!sk_rmem_schedule(sk, skb->truesize)) {
-               err = -ENOBUFS;
-               goto out;
+               atomic_inc(&sk->sk_drops);
+               return -ENOBUFS;
        }
 
        skb->dev = NULL;
@@ -305,12 +365,19 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
         */
        skb_len = skb->len;
 
-       skb_queue_tail(&sk->sk_receive_queue, skb);
+       /* we escape from rcu protected region, make sure we dont leak
+        * a norefcounted dst
+        */
+       skb_dst_force(skb);
+
+       spin_lock_irqsave(&list->lock, flags);
+       skb->dropcount = atomic_read(&sk->sk_drops);
+       __skb_queue_tail(list, skb);
+       spin_unlock_irqrestore(&list->lock, flags);
 
        if (!sock_flag(sk, SOCK_DEAD))
                sk->sk_data_ready(sk, skb_len);
-out:
-       return err;
+       return 0;
 }
 EXPORT_SYMBOL(sock_queue_rcv_skb);
 
@@ -323,6 +390,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 
        skb->dev = NULL;
 
+       if (sk_rcvqueues_full(sk, skb)) {
+               atomic_inc(&sk->sk_drops);
+               goto discard_and_relse;
+       }
        if (nested)
                bh_lock_sock_nested(sk);
        else
@@ -336,8 +407,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
                rc = sk_backlog_rcv(sk, skb);
 
                mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-       } else
-               sk_add_backlog(sk, skb);
+       } else if (sk_add_backlog(sk, skb)) {
+               bh_unlock_sock(sk);
+               atomic_inc(&sk->sk_drops);
+               goto discard_and_relse;
+       }
+
        bh_unlock_sock(sk);
 out:
        sock_put(sk);
@@ -348,12 +423,19 @@ discard_and_relse:
 }
 EXPORT_SYMBOL(sk_receive_skb);
 
+void sk_reset_txq(struct sock *sk)
+{
+       sk_tx_queue_clear(sk);
+}
+EXPORT_SYMBOL(sk_reset_txq);
+
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
-       struct dst_entry *dst = sk->sk_dst_cache;
+       struct dst_entry *dst = __sk_dst_get(sk);
 
        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
-               sk->sk_dst_cache = NULL;
+               sk_tx_queue_clear(sk);
+               RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
                dst_release(dst);
                return NULL;
        }
@@ -406,17 +488,18 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
        if (copy_from_user(devname, optval, optlen))
                goto out;
 
-       if (devname[0] == '\0') {
-               index = 0;
-       } else {
-               struct net_device *dev = dev_get_by_name(net, devname);
+       index = 0;
+       if (devname[0] != '\0') {
+               struct net_device *dev;
 
+               rcu_read_lock();
+               dev = dev_get_by_name_rcu(net, devname);
+               if (dev)
+                       index = dev->ifindex;
+               rcu_read_unlock();
                ret = -ENODEV;
                if (!dev)
                        goto out;
-
-               index = dev->ifindex;
-               dev_put(dev);
        }
 
        lock_sock(sk);
@@ -446,7 +529,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
  */
 
 int sock_setsockopt(struct socket *sock, int level, int optname,
-                   char __user *optval, int optlen)
+                   char __user *optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
        int val;
@@ -482,6 +565,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                sk->sk_reuse = valbool;
                break;
        case SO_TYPE:
+       case SO_PROTOCOL:
+       case SO_DOMAIN:
        case SO_ERROR:
                ret = -ENOPROTOOPT;
                break;
@@ -631,7 +716,7 @@ set_rcvbuf:
 
        case SO_TIMESTAMPING:
                if (val & ~SOF_TIMESTAMPING_MASK) {
-                       ret = EINVAL;
+                       ret = -EINVAL;
                        break;
                }
                sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
@@ -645,7 +730,7 @@ set_rcvbuf:
                                              SOCK_TIMESTAMPING_RX_SOFTWARE);
                else
                        sock_disable_timestamp(sk,
-                                              SOCK_TIMESTAMPING_RX_SOFTWARE);
+                                              (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
                sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
                                  val & SOF_TIMESTAMPING_SOFTWARE);
                sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
@@ -700,6 +785,14 @@ set_rcvbuf:
 
                /* We implement the SO_SNDLOWAT etc to
                   not be settable (1003.1g 5.3) */
+       case SO_RXQ_OVFL:
+               sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
+               break;
+
+       case SO_WIFI_STATUS:
+               sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
+               break;
+
        default:
                ret = -ENOPROTOOPT;
                break;
@@ -710,6 +803,20 @@ set_rcvbuf:
 EXPORT_SYMBOL(sock_setsockopt);
 
 
+void cred_to_ucred(struct pid *pid, const struct cred *cred,
+                  struct ucred *ucred)
+{
+       ucred->pid = pid_vnr(pid);
+       ucred->uid = ucred->gid = -1;
+       if (cred) {
+               struct user_namespace *current_ns = current_user_ns();
+
+               ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
+               ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
+       }
+}
+EXPORT_SYMBOL_GPL(cred_to_ucred);
+
 int sock_getsockopt(struct socket *sock, int level, int optname,
                    char __user *optval, int __user *optlen)
 {
@@ -721,7 +828,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                struct timeval tm;
        } v;
 
-       unsigned int lv = sizeof(int);
+       int lv = sizeof(int);
        int len;
 
        if (get_user(len, optlen))
@@ -764,6 +871,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val = sk->sk_type;
                break;
 
+       case SO_PROTOCOL:
+               v.val = sk->sk_protocol;
+               break;
+
+       case SO_DOMAIN:
+               v.val = sk->sk_family;
+               break;
+
        case SO_ERROR:
                v.val = -sock_error(sk);
                if (v.val == 0)
@@ -854,11 +969,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                break;
 
        case SO_PEERCRED:
-               if (len > sizeof(sk->sk_peercred))
-                       len = sizeof(sk->sk_peercred);
-               if (copy_to_user(optval, &sk->sk_peercred, len))
+       {
+               struct ucred peercred;
+               if (len > sizeof(peercred))
+                       len = sizeof(peercred);
+               cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+               if (copy_to_user(optval, &peercred, len))
                        return -EFAULT;
                goto lenout;
+       }
 
        case SO_PEERNAME:
        {
@@ -891,6 +1010,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val = sk->sk_mark;
                break;
 
+       case SO_RXQ_OVFL:
+               v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+               break;
+
+       case SO_WIFI_STATUS:
+               v.val = !!sock_flag(sk, SOCK_WIFI_STATUS);
+               break;
+
        default:
                return -ENOPROTOOPT;
        }
@@ -919,19 +1046,57 @@ static inline void sock_lock_init(struct sock *sk)
                        af_family_keys + sk->sk_family);
 }
 
+/*
+ * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
+ * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
+ */
 static void sock_copy(struct sock *nsk, const struct sock *osk)
 {
 #ifdef CONFIG_SECURITY_NETWORK
        void *sptr = nsk->sk_security;
 #endif
+       memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
+
+       memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
+              osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
 
-       memcpy(nsk, osk, osk->sk_prot->obj_size);
 #ifdef CONFIG_SECURITY_NETWORK
        nsk->sk_security = sptr;
        security_sk_clone(osk, nsk);
 #endif
 }
 
+/*
+ * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * un-modified. Special care is taken when initializing object to zero.
+ */
+static inline void sk_prot_clear_nulls(struct sock *sk, int size)
+{
+       if (offsetof(struct sock, sk_node.next) != 0)
+               memset(sk, 0, offsetof(struct sock, sk_node.next));
+       memset(&sk->sk_node.pprev, 0,
+              size - offsetof(struct sock, sk_node.pprev));
+}
+
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
+{
+       unsigned long nulls1, nulls2;
+
+       nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
+       nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
+       if (nulls1 > nulls2)
+               swap(nulls1, nulls2);
+
+       if (nulls1 != 0)
+               memset((char *)sk, 0, nulls1);
+       memset((char *)sk + nulls1 + sizeof(void *), 0,
+              nulls2 - nulls1 - sizeof(void *));
+       memset((char *)sk + nulls2 + sizeof(void *), 0,
+              size - nulls2 - sizeof(void *));
+}
+EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
+
 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                int family)
 {
@@ -939,9 +1104,17 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
        struct kmem_cache *slab;
 
        slab = prot->slab;
-       if (slab != NULL)
-               sk = kmem_cache_alloc(slab, priority);
-       else
+       if (slab != NULL) {
+               sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
+               if (!sk)
+                       return sk;
+               if (priority & __GFP_ZERO) {
+                       if (prot->clear_sk)
+                               prot->clear_sk(sk, prot->obj_size);
+                       else
+                               sk_prot_clear_nulls(sk, prot->obj_size);
+               }
+       } else
                sk = kmalloc(prot->obj_size, priority);
 
        if (sk != NULL) {
@@ -952,6 +1125,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 
                if (!try_module_get(prot->owner))
                        goto out_free_sec;
+               sk_tx_queue_clear(sk);
        }
 
        return sk;
@@ -982,6 +1156,32 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
        module_put(owner);
 }
 
+#ifdef CONFIG_CGROUPS
+void sock_update_classid(struct sock *sk)
+{
+       u32 classid;
+
+       rcu_read_lock();  /* doing current task, which cannot vanish. */
+       classid = task_cls_classid(current);
+       rcu_read_unlock();
+       if (classid && classid != sk->sk_classid)
+               sk->sk_classid = classid;
+}
+EXPORT_SYMBOL(sock_update_classid);
+
+void sock_update_netprioidx(struct sock *sk)
+{
+       struct cgroup_netprio_state *state;
+       if (in_interrupt())
+               return;
+       rcu_read_lock();
+       state = task_netprio_state(current);
+       sk->sk_cgrp_prioidx = state ? state->prioidx : 0;
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(sock_update_netprioidx);
+#endif
+
 /**
  *     sk_alloc - All socket objects are allocated here
  *     @net: the applicable net namespace
@@ -1004,6 +1204,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                sk->sk_prot = sk->sk_prot_creator = prot;
                sock_lock_init(sk);
                sock_net_set(sk, get_net(net));
+               atomic_set(&sk->sk_wmem_alloc, 1);
+
+               sock_update_classid(sk);
+               sock_update_netprioidx(sk);
        }
 
        return sk;
@@ -1017,19 +1221,22 @@ static void __sk_free(struct sock *sk)
        if (sk->sk_destruct)
                sk->sk_destruct(sk);
 
-       filter = rcu_dereference(sk->sk_filter);
+       filter = rcu_dereference_check(sk->sk_filter,
+                                      atomic_read(&sk->sk_wmem_alloc) == 0);
        if (filter) {
                sk_filter_uncharge(sk, filter);
-               rcu_assign_pointer(sk->sk_filter, NULL);
+               RCU_INIT_POINTER(sk->sk_filter, NULL);
        }
 
-       sock_disable_timestamp(sk, SOCK_TIMESTAMP);
-       sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
+       sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
 
        if (atomic_read(&sk->sk_omem_alloc))
                printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
                       __func__, atomic_read(&sk->sk_omem_alloc));
 
+       if (sk->sk_peer_cred)
+               put_cred(sk->sk_peer_cred);
+       put_pid(sk->sk_peer_pid);
        put_net(sock_net(sk));
        sk_prot_free(sk->sk_prot_creator, sk);
 }
@@ -1037,7 +1244,7 @@ static void __sk_free(struct sock *sk)
 void sk_free(struct sock *sk)
 {
        /*
-        * We substract one from sk_wmem_alloc and can know if
+        * We subtract one from sk_wmem_alloc and can know if
         * some packets are still in some tx queue.
         * If not null, sock_wfree() will call __sk_free(sk) later
         */
@@ -1047,10 +1254,10 @@ void sk_free(struct sock *sk)
 EXPORT_SYMBOL(sk_free);
 
 /*
- * Last sock_put should drop referrence to sk->sk_net. It has already
- * been dropped in sk_change_net. Taking referrence to stopping namespace
+ * Last sock_put should drop reference to sk->sk_net. It has already
+ * been dropped in sk_change_net. Taking reference to stopping namespace
  * is not an option.
- * Take referrence to a socket to remove it from hash _alive_ and after that
+ * Take reference to a socket to remove it from hash _alive_ and after that
  * destroy it in the context of init_net.
  */
 void sk_release_kernel(struct sock *sk)
@@ -1066,7 +1273,20 @@ void sk_release_kernel(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_release_kernel);
 
-struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
+static void sk_update_clone(const struct sock *sk, struct sock *newsk)
+{
+       if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+               sock_update_memcg(newsk);
+}
+
+/**
+ *     sk_clone_lock - clone a socket, and lock its clone
+ *     @sk: the socket to clone
+ *     @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ *     Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 {
        struct sock *newsk;
 
@@ -1082,6 +1302,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                sock_lock_init(newsk);
                bh_lock_sock(newsk);
                newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
+               newsk->sk_backlog.len = 0;
 
                atomic_set(&newsk->sk_rmem_alloc, 0);
                /*
@@ -1095,7 +1316,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                skb_queue_head_init(&newsk->sk_async_wait_queue);
 #endif
 
-               rwlock_init(&newsk->sk_dst_lock);
+               spin_lock_init(&newsk->sk_dst_lock);
                rwlock_init(&newsk->sk_callback_lock);
                lockdep_set_class_and_name(&newsk->sk_callback_lock,
                                af_callback_keys + newsk->sk_family,
@@ -1110,7 +1331,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                sock_reset_flag(newsk, SOCK_DONE);
                skb_queue_head_init(&newsk->sk_error_queue);
 
-               filter = newsk->sk_filter;
+               filter = rcu_dereference_protected(newsk->sk_filter, 1);
                if (filter != NULL)
                        sk_filter_charge(newsk, filter);
 
@@ -1118,6 +1339,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                        /* It is still raw copy of parent, so invalidate
                         * destructor and make plain sk_free() */
                        newsk->sk_destruct = NULL;
+                       bh_unlock_sock(newsk);
                        sk_free(newsk);
                        newsk = NULL;
                        goto out;
@@ -1125,6 +1347,11 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 
                newsk->sk_err      = 0;
                newsk->sk_priority = 0;
+               /*
+                * Before updating sk_refcnt, we must commit prior changes to memory
+                * (Documentation/RCU/rculist_nulls.txt for details)
+                */
+               smp_wmb();
                atomic_set(&newsk->sk_refcnt, 2);
 
                /*
@@ -1140,15 +1367,20 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                 */
                sk_refcnt_debug_inc(newsk);
                sk_set_socket(newsk, NULL);
-               newsk->sk_sleep  = NULL;
+               newsk->sk_wq = NULL;
+
+               sk_update_clone(sk, newsk);
 
                if (newsk->sk_prot->sockets_allocated)
-                       percpu_counter_inc(newsk->sk_prot->sockets_allocated);
+                       sk_sockets_allocated_inc(newsk);
+
+               if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+                       net_enable_timestamp();
        }
 out:
        return newsk;
 }
-EXPORT_SYMBOL_GPL(sk_clone);
+EXPORT_SYMBOL_GPL(sk_clone_lock);
 
 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 {
@@ -1156,6 +1388,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
        sk->sk_route_caps = dst->dev->features;
        if (sk->sk_route_caps & NETIF_F_GSO)
                sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+       sk->sk_route_caps &= ~sk->sk_route_nocaps;
        if (sk_can_gso(sk)) {
                if (dst->header_len) {
                        sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
@@ -1169,12 +1402,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
 
 void __init sk_init(void)
 {
-       if (num_physpages <= 4096) {
+       if (totalram_pages <= 4096) {
                sysctl_wmem_max = 32767;
                sysctl_rmem_max = 32767;
                sysctl_wmem_default = 32767;
                sysctl_rmem_default = 32767;
-       } else if (num_physpages >= 131072) {
+       } else if (totalram_pages >= 131072) {
                sysctl_wmem_max = 131071;
                sysctl_rmem_max = 131071;
        }
@@ -1191,17 +1424,22 @@ void __init sk_init(void)
 void sock_wfree(struct sk_buff *skb)
 {
        struct sock *sk = skb->sk;
-       int res;
+       unsigned int len = skb->truesize;
 
-       /* In case it might be waiting for more memory. */
-       res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc);
-       if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
+       if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+               /*
+                * Keep a reference on sk_wmem_alloc, this will be released
+                * after sk_write_space() call
+                */
+               atomic_sub(len - 1, &sk->sk_wmem_alloc);
                sk->sk_write_space(sk);
+               len = 1;
+       }
        /*
-        * if sk_wmem_alloc reached 0, we are last user and should
-        * free this sock, as sk_free() call could not do it.
+        * if sk_wmem_alloc reaches 0, we must finish what sk_free()
+        * could not do because of in-flight packets
         */
-       if (res == 0)
+       if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
                __sk_free(sk);
 }
 EXPORT_SYMBOL(sock_wfree);
@@ -1212,9 +1450,10 @@ EXPORT_SYMBOL(sock_wfree);
 void sock_rfree(struct sk_buff *skb)
 {
        struct sock *sk = skb->sk;
+       unsigned int len = skb->truesize;
 
-       atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-       sk_mem_uncharge(skb->sk, skb->truesize);
+       atomic_sub(len, &sk->sk_rmem_alloc);
+       sk_mem_uncharge(sk, len);
 }
 EXPORT_SYMBOL(sock_rfree);
 
@@ -1223,9 +1462,9 @@ int sock_i_uid(struct sock *sk)
 {
        int uid;
 
-       read_lock(&sk->sk_callback_lock);
+       read_lock_bh(&sk->sk_callback_lock);
        uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
-       read_unlock(&sk->sk_callback_lock);
+       read_unlock_bh(&sk->sk_callback_lock);
        return uid;
 }
 EXPORT_SYMBOL(sock_i_uid);
@@ -1234,9 +1473,9 @@ unsigned long sock_i_ino(struct sock *sk)
 {
        unsigned long ino;
 
-       read_lock(&sk->sk_callback_lock);
+       read_lock_bh(&sk->sk_callback_lock);
        ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
-       read_unlock(&sk->sk_callback_lock);
+       read_unlock_bh(&sk->sk_callback_lock);
        return ino;
 }
 EXPORT_SYMBOL(sock_i_ino);
@@ -1319,7 +1558,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                if (signal_pending(current))
                        break;
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-               prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
                        break;
                if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1328,7 +1567,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                        break;
                timeo = schedule_timeout(timeo);
        }
-       finish_wait(sk->sk_sleep, &wait);
+       finish_wait(sk_sleep(sk), &wait);
        return timeo;
 }
 
@@ -1375,7 +1614,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
                                skb_shinfo(skb)->nr_frags = npages;
                                for (i = 0; i < npages; i++) {
                                        struct page *page;
-                                       skb_frag_t *frag;
 
                                        page = alloc_pages(sk->sk_allocation, 0);
                                        if (!page) {
@@ -1385,12 +1623,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
                                                goto failure;
                                        }
 
-                                       frag = &skb_shinfo(skb)->frags[i];
-                                       frag->page = page;
-                                       frag->page_offset = 0;
-                                       frag->size = (data_len >= PAGE_SIZE ?
-                                                     PAGE_SIZE :
-                                                     data_len);
+                                       __skb_fill_page_desc(skb, i,
+                                                       page, 0,
+                                                       (data_len >= PAGE_SIZE ?
+                                                        PAGE_SIZE :
+                                                        data_len));
                                        data_len -= PAGE_SIZE;
                                }
 
@@ -1429,6 +1666,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
 EXPORT_SYMBOL(sock_alloc_send_skb);
 
 static void __lock_sock(struct sock *sk)
+       __releases(&sk->sk_lock.slock)
+       __acquires(&sk->sk_lock.slock)
 {
        DEFINE_WAIT(wait);
 
@@ -1445,6 +1684,8 @@ static void __lock_sock(struct sock *sk)
 }
 
 static void __release_sock(struct sock *sk)
+       __releases(&sk->sk_lock.slock)
+       __acquires(&sk->sk_lock.slock)
 {
        struct sk_buff *skb = sk->sk_backlog.head;
 
@@ -1455,6 +1696,7 @@ static void __release_sock(struct sock *sk)
                do {
                        struct sk_buff *next = skb->next;
 
+                       WARN_ON_ONCE(skb_dst_is_noref(skb));
                        skb->next = NULL;
                        sk_backlog_rcv(sk, skb);
 
@@ -1471,6 +1713,12 @@ static void __release_sock(struct sock *sk)
 
                bh_lock_sock(sk);
        } while ((skb = sk->sk_backlog.head) != NULL);
+
+       /*
+        * Doing the zeroing here guarantee we can not loop forever
+        * while a wild producer attempts to flood us.
+        */
+       sk->sk_backlog.len = 0;
 }
 
 /**
@@ -1488,11 +1736,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
        int rc;
        DEFINE_WAIT(wait);
 
-       prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
        set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
        rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
        clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-       finish_wait(sk->sk_sleep, &wait);
+       finish_wait(sk_sleep(sk), &wait);
        return rc;
 }
 EXPORT_SYMBOL(sk_wait_data);
@@ -1511,31 +1759,35 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 {
        struct proto *prot = sk->sk_prot;
        int amt = sk_mem_pages(size);
-       int allocated;
+       long allocated;
+       int parent_status = UNDER_LIMIT;
 
        sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-       allocated = atomic_add_return(amt, prot->memory_allocated);
+
+       allocated = sk_memory_allocated_add(sk, amt, &parent_status);
 
        /* Under limit. */
-       if (allocated <= prot->sysctl_mem[0]) {
-               if (prot->memory_pressure && *prot->memory_pressure)
-                       *prot->memory_pressure = 0;
+       if (parent_status == UNDER_LIMIT &&
+                       allocated <= sk_prot_mem_limits(sk, 0)) {
+               sk_leave_memory_pressure(sk);
                return 1;
        }
 
-       /* Under pressure. */
-       if (allocated > prot->sysctl_mem[1])
-               if (prot->enter_memory_pressure)
-                       prot->enter_memory_pressure(sk);
+       /* Under pressure. (we or our parents) */
+       if ((parent_status > SOFT_LIMIT) ||
+                       allocated > sk_prot_mem_limits(sk, 1))
+               sk_enter_memory_pressure(sk);
 
-       /* Over hard limit. */
-       if (allocated > prot->sysctl_mem[2])
+       /* Over hard limit (we or our parents) */
+       if ((parent_status == OVER_LIMIT) ||
+                       (allocated > sk_prot_mem_limits(sk, 2)))
                goto suppress_allocation;
 
        /* guarantee minimum buffer size under pressure */
        if (kind == SK_MEM_RECV) {
                if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
                        return 1;
+
        } else { /* SK_MEM_SEND */
                if (sk->sk_type == SOCK_STREAM) {
                        if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
@@ -1545,13 +1797,13 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
                                return 1;
        }
 
-       if (prot->memory_pressure) {
+       if (sk_has_memory_pressure(sk)) {
                int alloc;
 
-               if (!*prot->memory_pressure)
+               if (!sk_under_memory_pressure(sk))
                        return 1;
-               alloc = percpu_counter_read_positive(prot->sockets_allocated);
-               if (prot->sysctl_mem[2] > alloc *
+               alloc = sk_sockets_allocated_read_positive(sk);
+               if (sk_prot_mem_limits(sk, 2) > alloc *
                    sk_mem_pages(sk->sk_wmem_queued +
                                 atomic_read(&sk->sk_rmem_alloc) +
                                 sk->sk_forward_alloc))
@@ -1570,9 +1822,13 @@ suppress_allocation:
                        return 1;
        }
 
+       trace_sock_exceed_buf_limit(sk, prot, allocated);
+
        /* Alas. Undo changes. */
        sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-       atomic_sub(amt, prot->memory_allocated);
+
+       sk_memory_allocated_sub(sk, amt, parent_status);
+
        return 0;
 }
 EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1583,15 +1839,13 @@ EXPORT_SYMBOL(__sk_mem_schedule);
  */
 void __sk_mem_reclaim(struct sock *sk)
 {
-       struct proto *prot = sk->sk_prot;
-
-       atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
-                  prot->memory_allocated);
+       sk_memory_allocated_sub(sk,
+                               sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0);
        sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
-       if (prot->memory_pressure && *prot->memory_pressure &&
-           (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
-               *prot->memory_pressure = 0;
+       if (sk_under_memory_pressure(sk) &&
+           (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
+               sk_leave_memory_pressure(sk);
 }
 EXPORT_SYMBOL(__sk_mem_reclaim);
 
@@ -1660,7 +1914,7 @@ int sock_no_shutdown(struct socket *sock, int how)
 EXPORT_SYMBOL(sock_no_shutdown);
 
 int sock_no_setsockopt(struct socket *sock, int level, int optname,
-                   char __user *optval, int optlen)
+                   char __user *optval, unsigned int optlen)
 {
        return -EOPNOTSUPP;
 }
@@ -1714,41 +1968,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
 
 static void sock_def_wakeup(struct sock *sk)
 {
-       read_lock(&sk->sk_callback_lock);
-       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-               wake_up_interruptible_all(sk->sk_sleep);
-       read_unlock(&sk->sk_callback_lock);
+       struct socket_wq *wq;
+
+       rcu_read_lock();
+       wq = rcu_dereference(sk->sk_wq);
+       if (wq_has_sleeper(wq))
+               wake_up_interruptible_all(&wq->wait);
+       rcu_read_unlock();
 }
 
 static void sock_def_error_report(struct sock *sk)
 {
-       read_lock(&sk->sk_callback_lock);
-       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-               wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
+       struct socket_wq *wq;
+
+       rcu_read_lock();
+       wq = rcu_dereference(sk->sk_wq);
+       if (wq_has_sleeper(wq))
+               wake_up_interruptible_poll(&wq->wait, POLLERR);
        sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
-       read_unlock(&sk->sk_callback_lock);
+       rcu_read_unlock();
 }
 
 static void sock_def_readable(struct sock *sk, int len)
 {
-       read_lock(&sk->sk_callback_lock);
-       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-               wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+       struct socket_wq *wq;
+
+       rcu_read_lock();
+       wq = rcu_dereference(sk->sk_wq);
+       if (wq_has_sleeper(wq))
+               wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
                                                POLLRDNORM | POLLRDBAND);
        sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-       read_unlock(&sk->sk_callback_lock);
+       rcu_read_unlock();
 }
 
 static void sock_def_write_space(struct sock *sk)
 {
-       read_lock(&sk->sk_callback_lock);
+       struct socket_wq *wq;
+
+       rcu_read_lock();
 
        /* Do not wake up a writer until he can make "significant"
         * progress.  --DaveM
         */
        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-               if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-                       wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+               wq = rcu_dereference(sk->sk_wq);
+               if (wq_has_sleeper(wq))
+                       wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
                                                POLLWRNORM | POLLWRBAND);
 
                /* Should agree with poll, otherwise some programs break */
@@ -1756,7 +2022,7 @@ static void sock_def_write_space(struct sock *sk)
                        sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
        }
 
-       read_unlock(&sk->sk_callback_lock);
+       rcu_read_unlock();
 }
 
 static void sock_def_destruct(struct sock *sk)
@@ -1810,12 +2076,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
        if (sock) {
                sk->sk_type     =       sock->type;
-               sk->sk_sleep    =       &sock->wait;
+               sk->sk_wq       =       sock->wq;
                sock->sk        =       sk;
        } else
-               sk->sk_sleep    =       NULL;
+               sk->sk_wq       =       NULL;
 
-       rwlock_init(&sk->sk_dst_lock);
+       spin_lock_init(&sk->sk_dst_lock);
        rwlock_init(&sk->sk_callback_lock);
        lockdep_set_class_and_name(&sk->sk_callback_lock,
                        af_callback_keys + sk->sk_family,
@@ -1830,9 +2096,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_sndmsg_page      =       NULL;
        sk->sk_sndmsg_off       =       0;
 
-       sk->sk_peercred.pid     =       0;
-       sk->sk_peercred.uid     =       -1;
-       sk->sk_peercred.gid     =       -1;
+       sk->sk_peer_pid         =       NULL;
+       sk->sk_peer_cred        =       NULL;
        sk->sk_write_pending    =       0;
        sk->sk_rcvlowat         =       1;
        sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
@@ -1840,8 +2105,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
        sk->sk_stamp = ktime_set(-1L, 0);
 
+       /*
+        * Before updating sk_refcnt, we must commit prior changes to memory
+        * (Documentation/RCU/rculist_nulls.txt for details)
+        */
+       smp_wmb();
        atomic_set(&sk->sk_refcnt, 1);
-       atomic_set(&sk->sk_wmem_alloc, 1);
        atomic_set(&sk->sk_drops, 0);
 }
 EXPORT_SYMBOL(sock_init_data);
@@ -1879,6 +2148,39 @@ void release_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(release_sock);
 
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken
+ *   sk_lock.slock locked, owned = 0, BH disabled
+ * return true if slow path is taken
+ *   sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+bool lock_sock_fast(struct sock *sk)
+{
+       might_sleep();
+       spin_lock_bh(&sk->sk_lock.slock);
+
+       if (!sk->sk_lock.owned)
+               /*
+                * Note : We must disable BH
+                */
+               return false;
+
+       __lock_sock(sk);
+       sk->sk_lock.owned = 1;
+       spin_unlock(&sk->sk_lock.slock);
+       /*
+        * The sk_lock has mutex_lock() semantics here:
+        */
+       mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+       local_bh_enable();
+       return true;
+}
+EXPORT_SYMBOL(lock_sock_fast);
+
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
        struct timeval tv;
@@ -1914,16 +2216,15 @@ EXPORT_SYMBOL(sock_get_timestampns);
 void sock_enable_timestamp(struct sock *sk, int flag)
 {
        if (!sock_flag(sk, flag)) {
+               unsigned long previous_flags = sk->sk_flags;
+
                sock_set_flag(sk, flag);
                /*
                 * we just set one of the two flags which require net
                 * time stamping, but time stamping might have been on
                 * already because of the other one
                 */
-               if (!sock_flag(sk,
-                               flag == SOCK_TIMESTAMP ?
-                               SOCK_TIMESTAMPING_RX_SOFTWARE :
-                               SOCK_TIMESTAMP))
+               if (!(previous_flags & SK_FLAGS_TIMESTAMP))
                        net_enable_timestamp();
        }
 }
@@ -1977,7 +2278,7 @@ EXPORT_SYMBOL(sock_common_recvmsg);
  *     Set socket options on an inet socket.
  */
 int sock_common_setsockopt(struct socket *sock, int level, int optname,
-                          char __user *optval, int optlen)
+                          char __user *optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
 
@@ -1987,7 +2288,7 @@ EXPORT_SYMBOL(sock_common_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
-                                 char __user *optval, int optlen)
+                                 char __user *optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
 
@@ -2035,9 +2336,6 @@ void sk_common_release(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_common_release);
 
-static DEFINE_RWLOCK(proto_list_lock);
-static LIST_HEAD(proto_list);
-
 #ifdef CONFIG_PROC_FS
 #define PROTO_INUSE_NR 64      /* should be enough for the first time */
 struct prot_inuse {
@@ -2049,8 +2347,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
 #ifdef CONFIG_NET_NS
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-       int cpu = smp_processor_id();
-       per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+       __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
@@ -2066,13 +2363,13 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
 
-static int sock_inuse_init_net(struct net *net)
+static int __net_init sock_inuse_init_net(struct net *net)
 {
        net->core.inuse = alloc_percpu(struct prot_inuse);
        return net->core.inuse ? 0 : -ENOMEM;
 }
 
-static void sock_inuse_exit_net(struct net *net)
+static void __net_exit sock_inuse_exit_net(struct net *net)
 {
        free_percpu(net->core.inuse);
 }
@@ -2096,7 +2393,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
 
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-       __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
+       __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
@@ -2154,13 +2451,10 @@ int proto_register(struct proto *prot, int alloc_slab)
                }
 
                if (prot->rsk_prot != NULL) {
-                       static const char mask[] = "request_sock_%s";
-
-                       prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+                       prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
                        if (prot->rsk_prot->slab_name == NULL)
                                goto out_free_sock_slab;
 
-                       sprintf(prot->rsk_prot->slab_name, mask, prot->name);
                        prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
                                                                 prot->rsk_prot->obj_size, 0,
                                                                 SLAB_HWCACHE_ALIGN, NULL);
@@ -2173,14 +2467,11 @@ int proto_register(struct proto *prot, int alloc_slab)
                }
 
                if (prot->twsk_prot != NULL) {
-                       static const char mask[] = "tw_sock_%s";
-
-                       prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+                       prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
 
                        if (prot->twsk_prot->twsk_slab_name == NULL)
                                goto out_free_request_sock_slab;
 
-                       sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
                        prot->twsk_prot->twsk_slab =
                                kmem_cache_create(prot->twsk_prot->twsk_slab_name,
                                                  prot->twsk_prot->twsk_obj_size,
@@ -2193,10 +2484,10 @@ int proto_register(struct proto *prot, int alloc_slab)
                }
        }
 
-       write_lock(&proto_list_lock);
+       mutex_lock(&proto_list_mutex);
        list_add(&prot->node, &proto_list);
        assign_proto_idx(prot);
-       write_unlock(&proto_list_lock);
+       mutex_unlock(&proto_list_mutex);
        return 0;
 
 out_free_timewait_sock_slab_name:
@@ -2207,7 +2498,8 @@ out_free_request_sock_slab:
                prot->rsk_prot->slab = NULL;
        }
 out_free_request_sock_slab_name:
-       kfree(prot->rsk_prot->slab_name);
+       if (prot->rsk_prot)
+               kfree(prot->rsk_prot->slab_name);
 out_free_sock_slab:
        kmem_cache_destroy(prot->slab);
        prot->slab = NULL;
@@ -2218,10 +2510,10 @@ EXPORT_SYMBOL(proto_register);
 
 void proto_unregister(struct proto *prot)
 {
-       write_lock(&proto_list_lock);
+       mutex_lock(&proto_list_mutex);
        release_proto_idx(prot);
        list_del(&prot->node);
-       write_unlock(&proto_list_lock);
+       mutex_unlock(&proto_list_mutex);
 
        if (prot->slab != NULL) {
                kmem_cache_destroy(prot->slab);
@@ -2244,9 +2536,9 @@ EXPORT_SYMBOL(proto_unregister);
 
 #ifdef CONFIG_PROC_FS
 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
-       __acquires(proto_list_lock)
+       __acquires(proto_list_mutex)
 {
-       read_lock(&proto_list_lock);
+       mutex_lock(&proto_list_mutex);
        return seq_list_start_head(&proto_list, *pos);
 }
 
@@ -2256,25 +2548,36 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void proto_seq_stop(struct seq_file *seq, void *v)
-       __releases(proto_list_lock)
+       __releases(proto_list_mutex)
 {
-       read_unlock(&proto_list_lock);
+       mutex_unlock(&proto_list_mutex);
 }
 
 static char proto_method_implemented(const void *method)
 {
        return method == NULL ? 'n' : 'y';
 }
+static long sock_prot_memory_allocated(struct proto *proto)
+{
+       return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L;
+}
+
+static char *sock_prot_memory_pressure(struct proto *proto)
+{
+       return proto->memory_pressure != NULL ?
+       proto_memory_pressure(proto) ? "yes" : "no" : "NI";
+}
 
 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 {
-       seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
+
+       seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
                        "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
                   proto->name,
                   proto->obj_size,
                   sock_prot_inuse_get(seq_file_net(seq), proto),
-                  proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
-                  proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
+                  sock_prot_memory_allocated(proto),
+                  sock_prot_memory_pressure(proto),
                   proto->max_header,
                   proto->slab == NULL ? "no" : "yes",
                   module_name(proto->owner),