]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - net/core/sock.c
net: backlog functions rename
[linux-2.6.git] / net / core / sock.c
index 174c64bc7a431b9f5905eab4ee70fe9e21bc4b6c..61a65a2e04556eb19955e575c0d30a29d23d6de5 100644 (file)
@@ -7,8 +7,6 @@
  *             handler for protocols to use and generic option handler.
  *
  *
- * Version:    $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
- *
  * Authors:    Ross Biro
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *             Florian La Roche, <flla@stud.uni-sb.de>
 #include <net/net_namespace.h>
 #include <net/request_sock.h>
 #include <net/sock.h>
+#include <linux/net_tstamp.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
 
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
  * locks is fast):
  */
-static const char *af_family_key_strings[AF_MAX+1] = {
+static const char *const af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
@@ -152,13 +150,15 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
-  "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
+  "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
-  "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
+  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
+  "sk_lock-AF_IEEE802154",
+  "sk_lock-AF_MAX"
 };
-static const char *af_family_slock_key_strings[AF_MAX+1] = {
+static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
@@ -166,13 +166,15 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
-  "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
+  "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
-  "slock-AF_RXRPC" , "slock-AF_MAX"
+  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
+  "slock-AF_IEEE802154",
+  "slock-AF_MAX"
 };
-static const char *af_family_clock_key_strings[AF_MAX+1] = {
+static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
@@ -180,13 +182,14 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
-  "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
+  "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
-  "clock-27"       , "clock-28"          , "clock-29"          ,
+  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
-  "clock-AF_RXRPC" , "clock-AF_MAX"
+  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
+  "clock-AF_IEEE802154",
+  "clock-AF_MAX"
 };
-#endif
 
 /*
  * sk_callback_lock locking rules are per-address-family,
@@ -212,6 +215,7 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
 /* Maximal space eaten by iovec or ancilliary data plus some space */
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
+EXPORT_SYMBOL(sysctl_optmem_max);
 
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
@@ -228,11 +232,12 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
                static int warned __read_mostly;
 
                *timeo_p = 0;
-               if (warned < 10 && net_ratelimit())
+               if (warned < 10 && net_ratelimit()) {
                        warned++;
                        printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
                               "tries to set negative timeout\n",
                                current->comm, task_pid_nr(current));
+               }
                return 0;
        }
        *timeo_p = MAX_SCHEDULE_TIMEOUT;
@@ -255,36 +260,41 @@ static void sock_warn_obsolete_bsdism(const char *name)
        }
 }
 
-static void sock_disable_timestamp(struct sock *sk)
+static void sock_disable_timestamp(struct sock *sk, int flag)
 {
-       if (sock_flag(sk, SOCK_TIMESTAMP)) {
-               sock_reset_flag(sk, SOCK_TIMESTAMP);
-               net_disable_timestamp();
+       if (sock_flag(sk, flag)) {
+               sock_reset_flag(sk, flag);
+               if (!sock_flag(sk, SOCK_TIMESTAMP) &&
+                   !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
+                       net_disable_timestamp();
+               }
        }
 }
 
 
 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-       int err = 0;
+       int err;
        int skb_len;
+       unsigned long flags;
+       struct sk_buff_head *list = &sk->sk_receive_queue;
 
-       /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
+       /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
           number of warnings when compiling with -W --ANK
         */
        if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
            (unsigned)sk->sk_rcvbuf) {
-               err = -ENOMEM;
-               goto out;
+               atomic_inc(&sk->sk_drops);
+               return -ENOMEM;
        }
 
        err = sk_filter(sk, skb);
        if (err)
-               goto out;
+               return err;
 
        if (!sk_rmem_schedule(sk, skb->truesize)) {
-               err = -ENOBUFS;
-               goto out;
+               atomic_inc(&sk->sk_drops);
+               return -ENOBUFS;
        }
 
        skb->dev = NULL;
@@ -297,12 +307,14 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
         */
        skb_len = skb->len;
 
-       skb_queue_tail(&sk->sk_receive_queue, skb);
+       spin_lock_irqsave(&list->lock, flags);
+       skb->dropcount = atomic_read(&sk->sk_drops);
+       __skb_queue_tail(list, skb);
+       spin_unlock_irqrestore(&list->lock, flags);
 
        if (!sock_flag(sk, SOCK_DEAD))
                sk->sk_data_ready(sk, skb_len);
-out:
-       return err;
+       return 0;
 }
 EXPORT_SYMBOL(sock_queue_rcv_skb);
 
@@ -325,11 +337,15 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
                 */
                mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
 
-               rc = sk->sk_backlog_rcv(sk, skb);
+               rc = sk_backlog_rcv(sk, skb);
 
                mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-       } else
-               sk_add_backlog(sk, skb);
+       } else if (sk_add_backlog(sk, skb)) {
+               bh_unlock_sock(sk);
+               atomic_inc(&sk->sk_drops);
+               goto discard_and_relse;
+       }
+
        bh_unlock_sock(sk);
 out:
        sock_put(sk);
@@ -340,11 +356,18 @@ discard_and_relse:
 }
 EXPORT_SYMBOL(sk_receive_skb);
 
+void sk_reset_txq(struct sock *sk)
+{
+       sk_tx_queue_clear(sk);
+}
+EXPORT_SYMBOL(sk_reset_txq);
+
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
        struct dst_entry *dst = sk->sk_dst_cache;
 
        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+               sk_tx_queue_clear(sk);
                sk->sk_dst_cache = NULL;
                dst_release(dst);
                return NULL;
@@ -398,17 +421,18 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
        if (copy_from_user(devname, optval, optlen))
                goto out;
 
-       if (devname[0] == '\0') {
-               index = 0;
-       } else {
-               struct net_device *dev = dev_get_by_name(net, devname);
+       index = 0;
+       if (devname[0] != '\0') {
+               struct net_device *dev;
 
+               rcu_read_lock();
+               dev = dev_get_by_name_rcu(net, devname);
+               if (dev)
+                       index = dev->ifindex;
+               rcu_read_unlock();
                ret = -ENODEV;
                if (!dev)
                        goto out;
-
-               index = dev->ifindex;
-               dev_put(dev);
        }
 
        lock_sock(sk);
@@ -438,9 +462,9 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
  */
 
 int sock_setsockopt(struct socket *sock, int level, int optname,
-                   char __user *optval, int optlen)
+                   char __user *optval, unsigned int optlen)
 {
-       struct sock *sk=sock->sk;
+       struct sock *sk = sock->sk;
        int val;
        int valbool;
        struct linger ling;
@@ -450,15 +474,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
         *      Options without arguments
         */
 
-#ifdef SO_DONTLINGER           /* Compatibility item... */
-       if (optname == SO_DONTLINGER) {
-               lock_sock(sk);
-               sock_reset_flag(sk, SOCK_LINGER);
-               release_sock(sk);
-               return 0;
-       }
-#endif
-
        if (optname == SO_BINDTODEVICE)
                return sock_bindtodevice(sk, optval, optlen);
 
@@ -468,21 +483,23 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
        if (get_user(val, (int __user *)optval))
                return -EFAULT;
 
-       valbool = val?1:0;
+       valbool = val ? 1 : 0;
 
        lock_sock(sk);
 
-       switch(optname) {
+       switch (optname) {
        case SO_DEBUG:
-               if (val && !capable(CAP_NET_ADMIN)) {
+               if (val && !capable(CAP_NET_ADMIN))
                        ret = -EACCES;
-               else
+               else
                        sock_valbool_flag(sk, SOCK_DBG, valbool);
                break;
        case SO_REUSEADDR:
                sk->sk_reuse = valbool;
                break;
        case SO_TYPE:
+       case SO_PROTOCOL:
+       case SO_DOMAIN:
        case SO_ERROR:
                ret = -ENOPROTOOPT;
                break;
@@ -587,7 +604,7 @@ set_rcvbuf:
                        ret = -EINVAL;  /* 1003.1g */
                        break;
                }
-               if (copy_from_user(&ling,optval,sizeof(ling))) {
+               if (copy_from_user(&ling, optval, sizeof(ling))) {
                        ret = -EFAULT;
                        break;
                }
@@ -623,13 +640,38 @@ set_rcvbuf:
                        else
                                sock_set_flag(sk, SOCK_RCVTSTAMPNS);
                        sock_set_flag(sk, SOCK_RCVTSTAMP);
-                       sock_enable_timestamp(sk);
+                       sock_enable_timestamp(sk, SOCK_TIMESTAMP);
                } else {
                        sock_reset_flag(sk, SOCK_RCVTSTAMP);
                        sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
                }
                break;
 
+       case SO_TIMESTAMPING:
+               if (val & ~SOF_TIMESTAMPING_MASK) {
+                       ret = -EINVAL;
+                       break;
+               }
+               sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
+                                 val & SOF_TIMESTAMPING_TX_HARDWARE);
+               sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
+                                 val & SOF_TIMESTAMPING_TX_SOFTWARE);
+               sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
+                                 val & SOF_TIMESTAMPING_RX_HARDWARE);
+               if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+                       sock_enable_timestamp(sk,
+                                             SOCK_TIMESTAMPING_RX_SOFTWARE);
+               else
+                       sock_disable_timestamp(sk,
+                                              SOCK_TIMESTAMPING_RX_SOFTWARE);
+               sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
+                                 val & SOF_TIMESTAMPING_SOFTWARE);
+               sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
+                                 val & SOF_TIMESTAMPING_SYS_HARDWARE);
+               sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
+                                 val & SOF_TIMESTAMPING_RAW_HARDWARE);
+               break;
+
        case SO_RCVLOWAT:
                if (val < 0)
                        val = INT_MAX;
@@ -670,13 +712,18 @@ set_rcvbuf:
        case SO_MARK:
                if (!capable(CAP_NET_ADMIN))
                        ret = -EPERM;
-               else {
+               else
                        sk->sk_mark = val;
-               }
                break;
 
                /* We implement the SO_SNDLOWAT etc to
                   not be settable (1003.1g 5.3) */
+       case SO_RXQ_OVFL:
+               if (valbool)
+                       sock_set_flag(sk, SOCK_RXQ_OVFL);
+               else
+                       sock_reset_flag(sk, SOCK_RXQ_OVFL);
+               break;
        default:
                ret = -ENOPROTOOPT;
                break;
@@ -684,6 +731,7 @@ set_rcvbuf:
        release_sock(sk);
        return ret;
 }
+EXPORT_SYMBOL(sock_setsockopt);
 
 
 int sock_getsockopt(struct socket *sock, int level, int optname,
@@ -697,7 +745,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                struct timeval tm;
        } v;
 
-       unsigned int lv = sizeof(int);
+       int lv = sizeof(int);
        int len;
 
        if (get_user(len, optlen))
@@ -705,7 +753,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
        if (len < 0)
                return -EINVAL;
 
-       switch(optname) {
+       memset(&v, 0, sizeof(v));
+
+       switch (optname) {
        case SO_DEBUG:
                v.val = sock_flag(sk, SOCK_DBG);
                break;
@@ -738,9 +788,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val = sk->sk_type;
                break;
 
+       case SO_PROTOCOL:
+               v.val = sk->sk_protocol;
+               break;
+
+       case SO_DOMAIN:
+               v.val = sk->sk_family;
+               break;
+
        case SO_ERROR:
                v.val = -sock_error(sk);
-               if (v.val==0)
+               if (v.val == 0)
                        v.val = xchg(&sk->sk_err_soft, 0);
                break;
 
@@ -775,8 +833,26 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
                break;
 
+       case SO_TIMESTAMPING:
+               v.val = 0;
+               if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
+                       v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
+               if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
+                       v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
+               if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
+                       v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
+               if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+                       v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
+               if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
+                       v.val |= SOF_TIMESTAMPING_SOFTWARE;
+               if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
+                       v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
+               if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
+                       v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
+               break;
+
        case SO_RCVTIMEO:
-               lv=sizeof(struct timeval);
+               lv = sizeof(struct timeval);
                if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
                        v.tm.tv_sec = 0;
                        v.tm.tv_usec = 0;
@@ -787,7 +863,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                break;
 
        case SO_SNDTIMEO:
-               lv=sizeof(struct timeval);
+               lv = sizeof(struct timeval);
                if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
                        v.tm.tv_sec = 0;
                        v.tm.tv_usec = 0;
@@ -802,7 +878,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                break;
 
        case SO_SNDLOWAT:
-               v.val=1;
+               v.val = 1;
                break;
 
        case SO_PASSCRED:
@@ -847,6 +923,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val = sk->sk_mark;
                break;
 
+       case SO_RXQ_OVFL:
+               v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+               break;
+
        default:
                return -ENOPROTOOPT;
        }
@@ -875,13 +955,20 @@ static inline void sock_lock_init(struct sock *sk)
                        af_family_keys + sk->sk_family);
 }
 
+/*
+ * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
+ * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ */
 static void sock_copy(struct sock *nsk, const struct sock *osk)
 {
 #ifdef CONFIG_SECURITY_NETWORK
        void *sptr = nsk->sk_security;
 #endif
-
-       memcpy(nsk, osk, osk->sk_prot->obj_size);
+       BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
+                    sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
+                    sizeof(osk->sk_tx_queue_mapping));
+       memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
+              osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
 #ifdef CONFIG_SECURITY_NETWORK
        nsk->sk_security = sptr;
        security_sk_clone(osk, nsk);
@@ -895,17 +982,35 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
        struct kmem_cache *slab;
 
        slab = prot->slab;
-       if (slab != NULL)
-               sk = kmem_cache_alloc(slab, priority);
+       if (slab != NULL) {
+               sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
+               if (!sk)
+                       return sk;
+               if (priority & __GFP_ZERO) {
+                       /*
+                        * caches using SLAB_DESTROY_BY_RCU should let
+                        * sk_node.next un-modified. Special care is taken
+                        * when initializing object to zero.
+                        */
+                       if (offsetof(struct sock, sk_node.next) != 0)
+                               memset(sk, 0, offsetof(struct sock, sk_node.next));
+                       memset(&sk->sk_node.pprev, 0,
+                              prot->obj_size - offsetof(struct sock,
+                                                        sk_node.pprev));
+               }
+       }
        else
                sk = kmalloc(prot->obj_size, priority);
 
        if (sk != NULL) {
+               kmemcheck_annotate_bitfield(sk, flags);
+
                if (security_sk_alloc(sk, family, priority))
                        goto out_free;
 
                if (!try_module_get(prot->owner))
                        goto out_free_sec;
+               sk_tx_queue_clear(sk);
        }
 
        return sk;
@@ -942,7 +1047,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
  *     @family: protocol family
  *     @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
  *     @prot: struct proto associated with this new sock instance
- *     @zero_it: if we should zero the newly allocated sock
  */
 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                      struct proto *prot)
@@ -959,25 +1063,29 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                sk->sk_prot = sk->sk_prot_creator = prot;
                sock_lock_init(sk);
                sock_net_set(sk, get_net(net));
+               atomic_set(&sk->sk_wmem_alloc, 1);
        }
 
        return sk;
 }
+EXPORT_SYMBOL(sk_alloc);
 
-void sk_free(struct sock *sk)
+static void __sk_free(struct sock *sk)
 {
        struct sk_filter *filter;
 
        if (sk->sk_destruct)
                sk->sk_destruct(sk);
 
-       filter = rcu_dereference(sk->sk_filter);
+       filter = rcu_dereference_check(sk->sk_filter,
+                                      atomic_read(&sk->sk_wmem_alloc) == 0);
        if (filter) {
                sk_filter_uncharge(sk, filter);
                rcu_assign_pointer(sk->sk_filter, NULL);
        }
 
-       sock_disable_timestamp(sk);
+       sock_disable_timestamp(sk, SOCK_TIMESTAMP);
+       sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
 
        if (atomic_read(&sk->sk_omem_alloc))
                printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
@@ -987,6 +1095,18 @@ void sk_free(struct sock *sk)
        sk_prot_free(sk->sk_prot_creator, sk);
 }
 
+void sk_free(struct sock *sk)
+{
+       /*
+        * We substract one from sk_wmem_alloc and can know if
+        * some packets are still in some tx queue.
+        * If not null, sock_wfree() will call __sk_free(sk) later
+        */
+       if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+               __sk_free(sk);
+}
+EXPORT_SYMBOL(sk_free);
+
 /*
  * Last sock_put should drop referrence to sk->sk_net. It has already
  * been dropped in sk_change_net. Taking referrence to stopping namespace
@@ -1001,6 +1121,7 @@ void sk_release_kernel(struct sock *sk)
 
        sock_hold(sk);
        sock_release(sk->sk_socket);
+       release_net(sock_net(sk));
        sock_net_set(sk, get_net(&init_net));
        sock_put(sk);
 }
@@ -1022,9 +1143,13 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                sock_lock_init(newsk);
                bh_lock_sock(newsk);
                newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
+               newsk->sk_backlog.len = 0;
 
                atomic_set(&newsk->sk_rmem_alloc, 0);
-               atomic_set(&newsk->sk_wmem_alloc, 0);
+               /*
+                * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
+                */
+               atomic_set(&newsk->sk_wmem_alloc, 1);
                atomic_set(&newsk->sk_omem_alloc, 0);
                skb_queue_head_init(&newsk->sk_receive_queue);
                skb_queue_head_init(&newsk->sk_write_queue);
@@ -1062,6 +1187,11 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 
                newsk->sk_err      = 0;
                newsk->sk_priority = 0;
+               /*
+                * Before updating sk_refcnt, we must commit prior changes to memory
+                * (Documentation/RCU/rculist_nulls.txt for details)
+                */
+               smp_wmb();
                atomic_set(&newsk->sk_refcnt, 2);
 
                /*
@@ -1076,16 +1206,19 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
                 * to be taken into account in all callers. -acme
                 */
                sk_refcnt_debug_inc(newsk);
-               newsk->sk_socket = NULL;
+               sk_set_socket(newsk, NULL);
                newsk->sk_sleep  = NULL;
 
                if (newsk->sk_prot->sockets_allocated)
-                       atomic_inc(newsk->sk_prot->sockets_allocated);
+                       percpu_counter_inc(newsk->sk_prot->sockets_allocated);
+
+               if (sock_flag(newsk, SOCK_TIMESTAMP) ||
+                   sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+                       net_enable_timestamp();
        }
 out:
        return newsk;
 }
-
 EXPORT_SYMBOL_GPL(sk_clone);
 
 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
@@ -1107,12 +1240,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
 
 void __init sk_init(void)
 {
-       if (num_physpages <= 4096) {
+       if (totalram_pages <= 4096) {
                sysctl_wmem_max = 32767;
                sysctl_rmem_max = 32767;
                sysctl_wmem_default = 32767;
                sysctl_rmem_default = 32767;
-       } else if (num_physpages >= 131072) {
+       } else if (totalram_pages >= 131072) {
                sysctl_wmem_max = 131071;
                sysctl_rmem_max = 131071;
        }
@@ -1129,13 +1262,25 @@ void __init sk_init(void)
 void sock_wfree(struct sk_buff *skb)
 {
        struct sock *sk = skb->sk;
+       unsigned int len = skb->truesize;
 
-       /* In case it might be waiting for more memory. */
-       atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
-       if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
+       if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+               /*
+                * Keep a reference on sk_wmem_alloc, this will be released
+                * after sk_write_space() call
+                */
+               atomic_sub(len - 1, &sk->sk_wmem_alloc);
                sk->sk_write_space(sk);
-       sock_put(sk);
+               len = 1;
+       }
+       /*
+        * if sk_wmem_alloc reaches 0, we must finish what sk_free()
+        * could not do because of in-flight packets
+        */
+       if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
+               __sk_free(sk);
 }
+EXPORT_SYMBOL(sock_wfree);
 
 /*
  * Read buffer destructor automatically called from kfree_skb.
@@ -1144,10 +1289,10 @@ void sock_rfree(struct sk_buff *skb)
 {
        struct sock *sk = skb->sk;
 
-       skb_truesize_check(skb);
        atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
        sk_mem_uncharge(skb->sk, skb->truesize);
 }
+EXPORT_SYMBOL(sock_rfree);
 
 
 int sock_i_uid(struct sock *sk)
@@ -1159,6 +1304,7 @@ int sock_i_uid(struct sock *sk)
        read_unlock(&sk->sk_callback_lock);
        return uid;
 }
+EXPORT_SYMBOL(sock_i_uid);
 
 unsigned long sock_i_ino(struct sock *sk)
 {
@@ -1169,6 +1315,7 @@ unsigned long sock_i_ino(struct sock *sk)
        read_unlock(&sk->sk_callback_lock);
        return ino;
 }
+EXPORT_SYMBOL(sock_i_ino);
 
 /*
  * Allocate a skb from the socket's send buffer.
@@ -1177,7 +1324,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
                             gfp_t priority)
 {
        if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
-               struct sk_buff * skb = alloc_skb(size, priority);
+               struct sk_buff *skb = alloc_skb(size, priority);
                if (skb) {
                        skb_set_owner_w(skb, sk);
                        return skb;
@@ -1185,6 +1332,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
        }
        return NULL;
 }
+EXPORT_SYMBOL(sock_wmalloc);
 
 /*
  * Allocate a skb from the socket's receive buffer.
@@ -1221,6 +1369,7 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
        }
        return NULL;
 }
+EXPORT_SYMBOL(sock_kmalloc);
 
 /*
  * Free an option memory block.
@@ -1230,11 +1379,12 @@ void sock_kfree_s(struct sock *sk, void *mem, int size)
        kfree(mem);
        atomic_sub(size, &sk->sk_omem_alloc);
 }
+EXPORT_SYMBOL(sock_kfree_s);
 
 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
    I think, these locks should be removed for datagram sockets.
  */
-static long sock_wait_for_wmem(struct sock * sk, long timeo)
+static long sock_wait_for_wmem(struct sock *sk, long timeo)
 {
        DEFINE_WAIT(wait);
 
@@ -1263,10 +1413,9 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo)
  *     Generic send/receive buffer handlers
  */
 
-static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
-                                           unsigned long header_len,
-                                           unsigned long data_len,
-                                           int noblock, int *errcode)
+struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+                                    unsigned long data_len, int noblock,
+                                    int *errcode)
 {
        struct sk_buff *skb;
        gfp_t gfp_mask;
@@ -1346,12 +1495,14 @@ failure:
        *errcode = err;
        return NULL;
 }
+EXPORT_SYMBOL(sock_alloc_send_pskb);
 
 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
                                    int noblock, int *errcode)
 {
        return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
 }
+EXPORT_SYMBOL(sock_alloc_send_skb);
 
 static void __lock_sock(struct sock *sk)
 {
@@ -1381,7 +1532,7 @@ static void __release_sock(struct sock *sk)
                        struct sk_buff *next = skb->next;
 
                        skb->next = NULL;
-                       sk->sk_backlog_rcv(sk, skb);
+                       sk_backlog_rcv(sk, skb);
 
                        /*
                         * We are in process context here with softirqs
@@ -1396,6 +1547,12 @@ static void __release_sock(struct sock *sk)
 
                bh_lock_sock(sk);
        } while ((skb = sk->sk_backlog.head) != NULL);
+
+       /*
+        * Doing the zeroing here guarantee we can not loop forever
+        * while a wild producer attempts to flood us.
+        */
+       sk->sk_backlog.len = 0;
 }
 
 /**
@@ -1420,7 +1577,6 @@ int sk_wait_data(struct sock *sk, long *timeo)
        finish_wait(sk->sk_sleep, &wait);
        return rc;
 }
-
 EXPORT_SYMBOL(sk_wait_data);
 
 /**
@@ -1452,7 +1608,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
        /* Under pressure. */
        if (allocated > prot->sysctl_mem[1])
                if (prot->enter_memory_pressure)
-                       prot->enter_memory_pressure();
+                       prot->enter_memory_pressure(sk);
 
        /* Over hard limit. */
        if (allocated > prot->sysctl_mem[2])
@@ -1472,8 +1628,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
        }
 
        if (prot->memory_pressure) {
-               if (!*prot->memory_pressure ||
-                   prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
+               int alloc;
+
+               if (!*prot->memory_pressure)
+                       return 1;
+               alloc = percpu_counter_read_positive(prot->sockets_allocated);
+               if (prot->sysctl_mem[2] > alloc *
                    sk_mem_pages(sk->sk_wmem_queued +
                                 atomic_read(&sk->sk_rmem_alloc) +
                                 sk->sk_forward_alloc))
@@ -1497,7 +1657,6 @@ suppress_allocation:
        atomic_sub(amt, prot->memory_allocated);
        return 0;
 }
-
 EXPORT_SYMBOL(__sk_mem_schedule);
 
 /**
@@ -1516,7 +1675,6 @@ void __sk_mem_reclaim(struct sock *sk)
            (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
                *prot->memory_pressure = 0;
 }
-
 EXPORT_SYMBOL(__sk_mem_reclaim);
 
 
@@ -1531,78 +1689,92 @@ int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_bind);
 
 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
                    int len, int flags)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_connect);
 
 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_socketpair);
 
 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_accept);
 
 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
                    int *len, int peer)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_getname);
 
-unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
+unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
 {
        return 0;
 }
+EXPORT_SYMBOL(sock_no_poll);
 
 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_ioctl);
 
 int sock_no_listen(struct socket *sock, int backlog)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_listen);
 
 int sock_no_shutdown(struct socket *sock, int how)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_shutdown);
 
 int sock_no_setsockopt(struct socket *sock, int level, int optname,
-                   char __user *optval, int optlen)
+                   char __user *optval, unsigned int optlen)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_setsockopt);
 
 int sock_no_getsockopt(struct socket *sock, int level, int optname,
                    char __user *optval, int __user *optlen)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_getsockopt);
 
 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
                    size_t len)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_sendmsg);
 
 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
                    size_t len, int flags)
 {
        return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL(sock_no_recvmsg);
 
 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
 {
        /* Mirror missing mmap method error code */
        return -ENODEV;
 }
+EXPORT_SYMBOL(sock_no_mmap);
 
 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
 {
@@ -1616,6 +1788,7 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
        kunmap(page);
        return res;
 }
+EXPORT_SYMBOL(sock_no_sendpage);
 
 /*
  *     Default Socket Callbacks
@@ -1624,7 +1797,7 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
 static void sock_def_wakeup(struct sock *sk)
 {
        read_lock(&sk->sk_callback_lock);
-       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+       if (sk_has_sleeper(sk))
                wake_up_interruptible_all(sk->sk_sleep);
        read_unlock(&sk->sk_callback_lock);
 }
@@ -1632,8 +1805,8 @@ static void sock_def_wakeup(struct sock *sk)
 static void sock_def_error_report(struct sock *sk)
 {
        read_lock(&sk->sk_callback_lock);
-       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-               wake_up_interruptible(sk->sk_sleep);
+       if (sk_has_sleeper(sk))
+               wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
        sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
        read_unlock(&sk->sk_callback_lock);
 }
@@ -1641,8 +1814,9 @@ static void sock_def_error_report(struct sock *sk)
 static void sock_def_readable(struct sock *sk, int len)
 {
        read_lock(&sk->sk_callback_lock);
-       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-               wake_up_interruptible(sk->sk_sleep);
+       if (sk_has_sleeper(sk))
+               wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+                                               POLLRDNORM | POLLRDBAND);
        sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
        read_unlock(&sk->sk_callback_lock);
 }
@@ -1655,8 +1829,9 @@ static void sock_def_write_space(struct sock *sk)
         * progress.  --DaveM
         */
        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-               if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-                       wake_up_interruptible(sk->sk_sleep);
+               if (sk_has_sleeper(sk))
+                       wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+                                               POLLWRNORM | POLLWRBAND);
 
                /* Should agree with poll, otherwise some programs break */
                if (sock_writeable(sk))
@@ -1677,6 +1852,7 @@ void sk_send_sigurg(struct sock *sk)
                if (send_sigurg(&sk->sk_socket->file->f_owner))
                        sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
 }
+EXPORT_SYMBOL(sk_send_sigurg);
 
 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
                    unsigned long expires)
@@ -1684,7 +1860,6 @@ void sk_reset_timer(struct sock *sk, struct timer_list* timer,
        if (!mod_timer(timer, expires))
                sock_hold(sk);
 }
-
 EXPORT_SYMBOL(sk_reset_timer);
 
 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
@@ -1692,7 +1867,6 @@ void sk_stop_timer(struct sock *sk, struct timer_list* timer)
        if (timer_pending(timer) && del_timer(timer))
                __sock_put(sk);
 }
-
 EXPORT_SYMBOL(sk_stop_timer);
 
 void sock_init_data(struct socket *sock, struct sock *sk)
@@ -1711,8 +1885,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_allocation       =       GFP_KERNEL;
        sk->sk_rcvbuf           =       sysctl_rmem_default;
        sk->sk_sndbuf           =       sysctl_wmem_default;
+       sk->sk_backlog.limit    =       sk->sk_rcvbuf << 1;
        sk->sk_state            =       TCP_CLOSE;
-       sk->sk_socket           =       sock;
+       sk_set_socket(sk, sock);
 
        sock_set_flag(sk, SOCK_ZAPPED);
 
@@ -1746,11 +1921,17 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
        sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
 
-       sk->sk_stamp = ktime_set(-1L, -1L);
+       sk->sk_stamp = ktime_set(-1L, 0);
 
+       /*
+        * Before updating sk_refcnt, we must commit prior changes to memory
+        * (Documentation/RCU/rculist_nulls.txt for details)
+        */
+       smp_wmb();
        atomic_set(&sk->sk_refcnt, 1);
        atomic_set(&sk->sk_drops, 0);
 }
+EXPORT_SYMBOL(sock_init_data);
 
 void lock_sock_nested(struct sock *sk, int subclass)
 {
@@ -1766,7 +1947,6 @@ void lock_sock_nested(struct sock *sk, int subclass)
        mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
        local_bh_enable();
 }
-
 EXPORT_SYMBOL(lock_sock_nested);
 
 void release_sock(struct sock *sk)
@@ -1790,7 +1970,7 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
        struct timeval tv;
        if (!sock_flag(sk, SOCK_TIMESTAMP))
-               sock_enable_timestamp(sk);
+               sock_enable_timestamp(sk, SOCK_TIMESTAMP);
        tv = ktime_to_timeval(sk->sk_stamp);
        if (tv.tv_sec == -1)
                return -ENOENT;
@@ -1806,7 +1986,7 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
 {
        struct timespec ts;
        if (!sock_flag(sk, SOCK_TIMESTAMP))
-               sock_enable_timestamp(sk);
+               sock_enable_timestamp(sk, SOCK_TIMESTAMP);
        ts = ktime_to_timespec(sk->sk_stamp);
        if (ts.tv_sec == -1)
                return -ENOENT;
@@ -1818,11 +1998,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
 }
 EXPORT_SYMBOL(sock_get_timestampns);
 
-void sock_enable_timestamp(struct sock *sk)
+void sock_enable_timestamp(struct sock *sk, int flag)
 {
-       if (!sock_flag(sk, SOCK_TIMESTAMP)) {
-               sock_set_flag(sk, SOCK_TIMESTAMP);
-               net_enable_timestamp();
+       if (!sock_flag(sk, flag)) {
+               sock_set_flag(sk, flag);
+               /*
+                * we just set one of the two flags which require net
+                * time stamping, but time stamping might have been on
+                * already because of the other one
+                */
+               if (!sock_flag(sk,
+                               flag == SOCK_TIMESTAMP ?
+                               SOCK_TIMESTAMPING_RX_SOFTWARE :
+                               SOCK_TIMESTAMP))
+                       net_enable_timestamp();
        }
 }
 
@@ -1840,7 +2029,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname,
 
        return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
 }
-
 EXPORT_SYMBOL(sock_common_getsockopt);
 
 #ifdef CONFIG_COMPAT
@@ -1870,25 +2058,23 @@ int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
                msg->msg_namelen = addr_len;
        return err;
 }
-
 EXPORT_SYMBOL(sock_common_recvmsg);
 
 /*
  *     Set socket options on an inet socket.
  */
 int sock_common_setsockopt(struct socket *sock, int level, int optname,
-                          char __user *optval, int optlen)
+                          char __user *optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
 
        return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
 }
-
 EXPORT_SYMBOL(sock_common_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
-                                 char __user *optval, int optlen)
+                                 char __user *optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
 
@@ -1934,7 +2120,6 @@ void sk_common_release(struct sock *sk)
        sk_refcnt_debug_release(sk);
        sock_put(sk);
 }
-
 EXPORT_SYMBOL(sk_common_release);
 
 static DEFINE_RWLOCK(proto_list_lock);
@@ -1947,15 +2132,62 @@ struct prot_inuse {
 };
 
 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
+
+#ifdef CONFIG_NET_NS
+void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+{
+       int cpu = smp_processor_id();
+       per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+
+int sock_prot_inuse_get(struct net *net, struct proto *prot)
+{
+       int cpu, idx = prot->inuse_idx;
+       int res = 0;
+
+       for_each_possible_cpu(cpu)
+               res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
+
+       return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+
+static int __net_init sock_inuse_init_net(struct net *net)
+{
+       net->core.inuse = alloc_percpu(struct prot_inuse);
+       return net->core.inuse ? 0 : -ENOMEM;
+}
+
+static void __net_exit sock_inuse_exit_net(struct net *net)
+{
+       free_percpu(net->core.inuse);
+}
+
+static struct pernet_operations net_inuse_ops = {
+       .init = sock_inuse_init_net,
+       .exit = sock_inuse_exit_net,
+};
+
+static __init int net_inuse_init(void)
+{
+       if (register_pernet_subsys(&net_inuse_ops))
+               panic("Cannot initialize net inuse counters");
+
+       return 0;
+}
+
+core_initcall(net_inuse_init);
+#else
 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
 
-void sock_prot_inuse_add(struct proto *prot, int val)
+void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
        __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
-int sock_prot_inuse_get(struct proto *prot)
+int sock_prot_inuse_get(struct net *net, struct proto *prot)
 {
        int cpu, idx = prot->inuse_idx;
        int res = 0;
@@ -1966,6 +2198,7 @@ int sock_prot_inuse_get(struct proto *prot)
        return res >= 0 ? res : 0;
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+#endif
 
 static void assign_proto_idx(struct proto *prot)
 {
@@ -1996,33 +2229,23 @@ static inline void release_proto_idx(struct proto *prot)
 
 int proto_register(struct proto *prot, int alloc_slab)
 {
-       char *request_sock_slab_name = NULL;
-       char *timewait_sock_slab_name;
-
-       if (sock_prot_inuse_init(prot) != 0) {
-               printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name);
-               goto out;
-       }
-
        if (alloc_slab) {
                prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
-                                              SLAB_HWCACHE_ALIGN, NULL);
+                                       SLAB_HWCACHE_ALIGN | prot->slab_flags,
+                                       NULL);
 
                if (prot->slab == NULL) {
                        printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
                               prot->name);
-                       goto out_free_inuse;
+                       goto out;
                }
 
                if (prot->rsk_prot != NULL) {
-                       static const char mask[] = "request_sock_%s";
-
-                       request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
-                       if (request_sock_slab_name == NULL)
+                       prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
+                       if (prot->rsk_prot->slab_name == NULL)
                                goto out_free_sock_slab;
 
-                       sprintf(request_sock_slab_name, mask, prot->name);
-                       prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
+                       prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
                                                                 prot->rsk_prot->obj_size, 0,
                                                                 SLAB_HWCACHE_ALIGN, NULL);
 
@@ -2034,18 +2257,17 @@ int proto_register(struct proto *prot, int alloc_slab)
                }
 
                if (prot->twsk_prot != NULL) {
-                       static const char mask[] = "tw_sock_%s";
+                       prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
 
-                       timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
-
-                       if (timewait_sock_slab_name == NULL)
+                       if (prot->twsk_prot->twsk_slab_name == NULL)
                                goto out_free_request_sock_slab;
 
-                       sprintf(timewait_sock_slab_name, mask, prot->name);
                        prot->twsk_prot->twsk_slab =
-                               kmem_cache_create(timewait_sock_slab_name,
+                               kmem_cache_create(prot->twsk_prot->twsk_slab_name,
                                                  prot->twsk_prot->twsk_obj_size,
-                                                 0, SLAB_HWCACHE_ALIGN,
+                                                 0,
+                                                 SLAB_HWCACHE_ALIGN |
+                                                       prot->slab_flags,
                                                  NULL);
                        if (prot->twsk_prot->twsk_slab == NULL)
                                goto out_free_timewait_sock_slab_name;
@@ -2059,23 +2281,20 @@ int proto_register(struct proto *prot, int alloc_slab)
        return 0;
 
 out_free_timewait_sock_slab_name:
-       kfree(timewait_sock_slab_name);
+       kfree(prot->twsk_prot->twsk_slab_name);
 out_free_request_sock_slab:
        if (prot->rsk_prot && prot->rsk_prot->slab) {
                kmem_cache_destroy(prot->rsk_prot->slab);
                prot->rsk_prot->slab = NULL;
        }
 out_free_request_sock_slab_name:
-       kfree(request_sock_slab_name);
+       kfree(prot->rsk_prot->slab_name);
 out_free_sock_slab:
        kmem_cache_destroy(prot->slab);
        prot->slab = NULL;
-out_free_inuse:
-       sock_prot_inuse_free(prot);
 out:
        return -ENOBUFS;
 }
-
 EXPORT_SYMBOL(proto_register);
 
 void proto_unregister(struct proto *prot)
@@ -2085,30 +2304,23 @@ void proto_unregister(struct proto *prot)
        list_del(&prot->node);
        write_unlock(&proto_list_lock);
 
-       sock_prot_inuse_free(prot);
-
        if (prot->slab != NULL) {
                kmem_cache_destroy(prot->slab);
                prot->slab = NULL;
        }
 
        if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
-               const char *name = kmem_cache_name(prot->rsk_prot->slab);
-
                kmem_cache_destroy(prot->rsk_prot->slab);
-               kfree(name);
+               kfree(prot->rsk_prot->slab_name);
                prot->rsk_prot->slab = NULL;
        }
 
        if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
-               const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
-
                kmem_cache_destroy(prot->twsk_prot->twsk_slab);
-               kfree(name);
+               kfree(prot->twsk_prot->twsk_slab_name);
                prot->twsk_prot->twsk_slab = NULL;
        }
 }
-
 EXPORT_SYMBOL(proto_unregister);
 
 #ifdef CONFIG_PROC_FS
@@ -2141,7 +2353,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
                        "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
                   proto->name,
                   proto->obj_size,
-                  proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
+                  sock_prot_inuse_get(seq_file_net(seq), proto),
                   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
                   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
                   proto->max_header,
@@ -2195,7 +2407,8 @@ static const struct seq_operations proto_seq_ops = {
 
 static int proto_seq_open(struct inode *inode, struct file *file)
 {
-       return seq_open(file, &proto_seq_ops);
+       return seq_open_net(inode, file, &proto_seq_ops,
+                           sizeof(struct seq_net_private));
 }
 
 static const struct file_operations proto_seq_fops = {
@@ -2203,45 +2416,33 @@ static const struct file_operations proto_seq_fops = {
        .open           = proto_seq_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
-       .release        = seq_release,
+       .release        = seq_release_net,
+};
+
+static __net_init int proto_init_net(struct net *net)
+{
+       if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static __net_exit void proto_exit_net(struct net *net)
+{
+       proc_net_remove(net, "protocols");
+}
+
+
+static __net_initdata struct pernet_operations proto_net_ops = {
+       .init = proto_init_net,
+       .exit = proto_exit_net,
 };
 
 static int __init proto_init(void)
 {
-       /* register /proc/net/protocols */
-       return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+       return register_pernet_subsys(&proto_net_ops);
 }
 
 subsys_initcall(proto_init);
 
 #endif /* PROC_FS */
-
-EXPORT_SYMBOL(sk_alloc);
-EXPORT_SYMBOL(sk_free);
-EXPORT_SYMBOL(sk_send_sigurg);
-EXPORT_SYMBOL(sock_alloc_send_skb);
-EXPORT_SYMBOL(sock_init_data);
-EXPORT_SYMBOL(sock_kfree_s);
-EXPORT_SYMBOL(sock_kmalloc);
-EXPORT_SYMBOL(sock_no_accept);
-EXPORT_SYMBOL(sock_no_bind);
-EXPORT_SYMBOL(sock_no_connect);
-EXPORT_SYMBOL(sock_no_getname);
-EXPORT_SYMBOL(sock_no_getsockopt);
-EXPORT_SYMBOL(sock_no_ioctl);
-EXPORT_SYMBOL(sock_no_listen);
-EXPORT_SYMBOL(sock_no_mmap);
-EXPORT_SYMBOL(sock_no_poll);
-EXPORT_SYMBOL(sock_no_recvmsg);
-EXPORT_SYMBOL(sock_no_sendmsg);
-EXPORT_SYMBOL(sock_no_sendpage);
-EXPORT_SYMBOL(sock_no_setsockopt);
-EXPORT_SYMBOL(sock_no_shutdown);
-EXPORT_SYMBOL(sock_no_socketpair);
-EXPORT_SYMBOL(sock_rfree);
-EXPORT_SYMBOL(sock_setsockopt);
-EXPORT_SYMBOL(sock_wfree);
-EXPORT_SYMBOL(sock_wmalloc);
-EXPORT_SYMBOL(sock_i_uid);
-EXPORT_SYMBOL(sock_i_ino);
-EXPORT_SYMBOL(sysctl_optmem_max);