Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
index d0ccdaf..37890f2 100644 (file)
@@ -18,6 +18,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <linux/swap.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/mutex.h>
 
 #include <net/net_namespace.h>
+#include <linux/nsproxy.h>
 #include <net/ip.h>
 #ifdef CONFIG_IP_VS_IPV6
 #include <net/ipv6.h>
@@ -53,38 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
 /* lock for service table */
 static DEFINE_RWLOCK(__ip_vs_svc_lock);
 
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
 /* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-
 
 #ifdef CONFIG_IP_VS_DEBUG
 static int sysctl_ip_vs_debug_level = 0;
@@ -95,31 +69,35 @@ int ip_vs_get_debug_level(void)
 }
 #endif
 
+
+/*  Protos */
+static void __ip_vs_del_service(struct ip_vs_service *svc);
+
+
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+                                   const struct in6_addr *addr)
 {
        struct rt6_info *rt;
-       struct flowi fl = {
-               .oif = 0,
-               .nl_u = {
-                       .ip6_u = {
-                               .daddr = *addr,
-                               .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+       struct flowi6 fl6 = {
+               .daddr = *addr,
        };
 
-       rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+       rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
        if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
-                       return 1;
+               return 1;
 
        return 0;
 }
 #endif
+
+#ifdef CONFIG_SYSCTL
 /*
  *     update_defense_level is called from keventd and from sysctl,
  *     so it needs to protect itself from softirqs
  */
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
 {
        struct sysinfo i;
        static int old_secure_tcp = 0;
@@ -135,73 +113,73 @@ static void update_defense_level(void)
        /* si_swapinfo(&i); */
        /* availmem = availmem - (i.totalswap - i.freeswap); */
 
-       nomem = (availmem < sysctl_ip_vs_amemthresh);
+       nomem = (availmem < ipvs->sysctl_amemthresh);
 
        local_bh_disable();
 
        /* drop_entry */
-       spin_lock(&__ip_vs_dropentry_lock);
-       switch (sysctl_ip_vs_drop_entry) {
+       spin_lock(&ipvs->dropentry_lock);
+       switch (ipvs->sysctl_drop_entry) {
        case 0:
-               atomic_set(&ip_vs_dropentry, 0);
+               atomic_set(&ipvs->dropentry, 0);
                break;
        case 1:
                if (nomem) {
-                       atomic_set(&ip_vs_dropentry, 1);
-                       sysctl_ip_vs_drop_entry = 2;
+                       atomic_set(&ipvs->dropentry, 1);
+                       ipvs->sysctl_drop_entry = 2;
                } else {
-                       atomic_set(&ip_vs_dropentry, 0);
+                       atomic_set(&ipvs->dropentry, 0);
                }
                break;
        case 2:
                if (nomem) {
-                       atomic_set(&ip_vs_dropentry, 1);
+                       atomic_set(&ipvs->dropentry, 1);
                } else {
-                       atomic_set(&ip_vs_dropentry, 0);
-                       sysctl_ip_vs_drop_entry = 1;
+                       atomic_set(&ipvs->dropentry, 0);
+                       ipvs->sysctl_drop_entry = 1;
                };
                break;
        case 3:
-               atomic_set(&ip_vs_dropentry, 1);
+               atomic_set(&ipvs->dropentry, 1);
                break;
        }
-       spin_unlock(&__ip_vs_dropentry_lock);
+       spin_unlock(&ipvs->dropentry_lock);
 
        /* drop_packet */
-       spin_lock(&__ip_vs_droppacket_lock);
-       switch (sysctl_ip_vs_drop_packet) {
+       spin_lock(&ipvs->droppacket_lock);
+       switch (ipvs->sysctl_drop_packet) {
        case 0:
-               ip_vs_drop_rate = 0;
+               ipvs->drop_rate = 0;
                break;
        case 1:
                if (nomem) {
-                       ip_vs_drop_rate = ip_vs_drop_counter
-                               = sysctl_ip_vs_amemthresh /
-                               (sysctl_ip_vs_amemthresh-availmem);
-                       sysctl_ip_vs_drop_packet = 2;
+                       ipvs->drop_rate = ipvs->drop_counter
+                               = ipvs->sysctl_amemthresh /
+                               (ipvs->sysctl_amemthresh-availmem);
+                       ipvs->sysctl_drop_packet = 2;
                } else {
-                       ip_vs_drop_rate = 0;
+                       ipvs->drop_rate = 0;
                }
                break;
        case 2:
                if (nomem) {
-                       ip_vs_drop_rate = ip_vs_drop_counter
-                               = sysctl_ip_vs_amemthresh /
-                               (sysctl_ip_vs_amemthresh-availmem);
+                       ipvs->drop_rate = ipvs->drop_counter
+                               = ipvs->sysctl_amemthresh /
+                               (ipvs->sysctl_amemthresh-availmem);
                } else {
-                       ip_vs_drop_rate = 0;
-                       sysctl_ip_vs_drop_packet = 1;
+                       ipvs->drop_rate = 0;
+                       ipvs->sysctl_drop_packet = 1;
                }
                break;
        case 3:
-               ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+               ipvs->drop_rate = ipvs->sysctl_am_droprate;
                break;
        }
-       spin_unlock(&__ip_vs_droppacket_lock);
+       spin_unlock(&ipvs->droppacket_lock);
 
        /* secure_tcp */
-       write_lock(&__ip_vs_securetcp_lock);
-       switch (sysctl_ip_vs_secure_tcp) {
+       spin_lock(&ipvs->securetcp_lock);
+       switch (ipvs->sysctl_secure_tcp) {
        case 0:
                if (old_secure_tcp >= 2)
                        to_change = 0;
@@ -210,7 +188,7 @@ static void update_defense_level(void)
                if (nomem) {
                        if (old_secure_tcp < 2)
                                to_change = 1;
-                       sysctl_ip_vs_secure_tcp = 2;
+                       ipvs->sysctl_secure_tcp = 2;
                } else {
                        if (old_secure_tcp >= 2)
                                to_change = 0;
@@ -223,7 +201,7 @@ static void update_defense_level(void)
                } else {
                        if (old_secure_tcp >= 2)
                                to_change = 0;
-                       sysctl_ip_vs_secure_tcp = 1;
+                       ipvs->sysctl_secure_tcp = 1;
                }
                break;
        case 3:
@@ -231,10 +209,11 @@ static void update_defense_level(void)
                        to_change = 1;
                break;
        }
-       old_secure_tcp = sysctl_ip_vs_secure_tcp;
+       old_secure_tcp = ipvs->sysctl_secure_tcp;
        if (to_change >= 0)
-               ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
-       write_unlock(&__ip_vs_securetcp_lock);
+               ip_vs_protocol_timeout_change(ipvs,
+                                             ipvs->sysctl_secure_tcp > 1);
+       spin_unlock(&ipvs->securetcp_lock);
 
        local_bh_enable();
 }
@@ -244,17 +223,18 @@ static void update_defense_level(void)
  *     Timer for checking the defense
  */
 #define DEFENSE_TIMER_PERIOD   1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
 
 static void defense_work_handler(struct work_struct *work)
 {
-       update_defense_level();
-       if (atomic_read(&ip_vs_dropentry))
-               ip_vs_random_dropentry();
+       struct netns_ipvs *ipvs =
+               container_of(work, struct netns_ipvs, defense_work.work);
 
-       schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+       update_defense_level(ipvs);
+       if (atomic_read(&ipvs->dropentry))
+               ip_vs_random_dropentry(ipvs->net);
+       schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
+#endif
 
 int
 ip_vs_use_count_inc(void)
@@ -281,33 +261,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 /* the service table hashed by fwmark */
 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 
-/*
- *     Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- *     Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- *     FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
 
 /*
  *     Returns hash value for virtual service
  */
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
-                 __be16 port)
+static inline unsigned
+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+                 const union nf_inet_addr *addr, __be16 port)
 {
        register unsigned porth = ntohs(port);
        __be32 addr_fold = addr->ip;
@@ -317,6 +277,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
                addr_fold = addr->ip6[0]^addr->ip6[1]^
                            addr->ip6[2]^addr->ip6[3];
 #endif
+       addr_fold ^= ((size_t)net>>8);
 
        return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
                & IP_VS_SVC_TAB_MASK;
@@ -325,13 +286,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
 /*
  *     Returns hash value of fwmark for virtual service lookup
  */
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
 {
-       return fwmark & IP_VS_SVC_TAB_MASK;
+       return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 }
 
 /*
- *     Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *     Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
  *     or in the ip_vs_svc_fwm_table by fwmark.
  *     Should be called with locked tables.
  */
@@ -340,23 +301,23 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
        unsigned hash;
 
        if (svc->flags & IP_VS_SVC_F_HASHED) {
-               IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
-                         "called from %p\n", __builtin_return_address(0));
+               pr_err("%s(): request for already hashed, called from %pF\n",
+                      __func__, __builtin_return_address(0));
                return 0;
        }
 
        if (svc->fwmark == 0) {
                /*
-                *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+                *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
                 */
-               hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
-                                        svc->port);
+               hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+                                        &svc->addr, svc->port);
                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
        } else {
                /*
-                *  Hash it by fwmark in ip_vs_svc_fwm_table
+                *  Hash it by fwmark in svc_fwm_table
                 */
-               hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+               hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
        }
 
@@ -368,22 +329,22 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
 
 /*
- *     Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *     Unhashes a service from svc_table / svc_fwm_table.
  *     Should be called with locked tables.
  */
 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 {
        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
-               IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
-                         "called from %p\n", __builtin_return_address(0));
+               pr_err("%s(): request for unhash flagged, called from %pF\n",
+                      __func__, __builtin_return_address(0));
                return 0;
        }
 
        if (svc->fwmark == 0) {
-               /* Remove it from the ip_vs_svc_table table */
+               /* Remove it from the svc_table table */
                list_del(&svc->s_list);
        } else {
-               /* Remove it from the ip_vs_svc_fwm_table table */
+               /* Remove it from the svc_fwm_table table */
                list_del(&svc->f_list);
        }
 
@@ -394,25 +355,25 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 
 
 /*
- *     Get service by {proto,addr,port} in the service table.
+ *     Get service by {netns, proto,addr,port} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
-                   __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+                    const union nf_inet_addr *vaddr, __be16 vport)
 {
        unsigned hash;
        struct ip_vs_service *svc;
 
        /* Check for "full" addressed entries */
-       hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+       hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
 
        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
                if ((svc->af == af)
                    && ip_vs_addr_equal(af, &svc->addr, vaddr)
                    && (svc->port == vport)
-                   && (svc->protocol == protocol)) {
+                   && (svc->protocol == protocol)
+                   && net_eq(svc->net, net)) {
                        /* HIT */
-                       atomic_inc(&svc->usecnt);
                        return svc;
                }
        }
@@ -425,18 +386,18 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
  *     Get service by {fwmark} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_svc_fwm_get(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 {
        unsigned hash;
        struct ip_vs_service *svc;
 
        /* Check for fwmark addressed entries */
-       hash = ip_vs_svc_fwm_hashkey(fwmark);
+       hash = ip_vs_svc_fwm_hashkey(net, fwmark);
 
        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-               if (svc->fwmark == fwmark && svc->af == af) {
+               if (svc->fwmark == fwmark && svc->af == af
+                   && net_eq(svc->net, net)) {
                        /* HIT */
-                       atomic_inc(&svc->usecnt);
                        return svc;
                }
        }
@@ -445,45 +406,51 @@ __ip_vs_svc_fwm_get(int af, __u32 fwmark)
 }
 
 struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
                  const union nf_inet_addr *vaddr, __be16 vport)
 {
        struct ip_vs_service *svc;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
        read_lock(&__ip_vs_svc_lock);
 
        /*
         *      Check the table hashed by fwmark first
         */
-       if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
-               goto out;
+       if (fwmark) {
+               svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+               if (svc)
+                       goto out;
+       }
 
        /*
         *      Check the table hashed by <protocol,addr,port>
         *      for "full" addressed entries
         */
-       svc = __ip_vs_service_get(af, protocol, vaddr, vport);
+       svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
 
        if (svc == NULL
            && protocol == IPPROTO_TCP
-           && atomic_read(&ip_vs_ftpsvc_counter)
+           && atomic_read(&ipvs->ftpsvc_counter)
            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
                /*
                 * Check if ftp service entry exists, the packet
                 * might belong to FTP data connections.
                 */
-               svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
+               svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
        }
 
        if (svc == NULL
-           && atomic_read(&ip_vs_nullsvc_counter)) {
+           && atomic_read(&ipvs->nullsvc_counter)) {
                /*
                 * Check if the catch-all port (port zero) exists
                 */
-               svc = __ip_vs_service_get(af, protocol, vaddr, 0);
+               svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
        }
 
   out:
+       if (svc)
+               atomic_inc(&svc->usecnt);
        read_unlock(&__ip_vs_svc_lock);
 
        IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
@@ -502,14 +469,20 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
        dest->svc = svc;
 }
 
-static inline void
+static void
 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 {
        struct ip_vs_service *svc = dest->svc;
 
        dest->svc = NULL;
-       if (atomic_dec_and_test(&svc->refcnt))
+       if (atomic_dec_and_test(&svc->refcnt)) {
+               IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
+                             svc->fwmark,
+                             IP_VS_DBG_ADDR(svc->af, &svc->addr),
+                             ntohs(svc->port), atomic_read(&svc->usecnt));
+               free_percpu(svc->stats.cpustats);
                kfree(svc);
+       }
 }
 
 
@@ -534,10 +507,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
 }
 
 /*
- *     Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *     Hashes ip_vs_dest in rs_table by <proto,addr,port>.
  *     should be called with locked tables.
  */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 {
        unsigned hash;
 
@@ -551,19 +524,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
         */
        hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 
-       list_add(&dest->d_list, &ip_vs_rtable[hash]);
+       list_add(&dest->d_list, &ipvs->rs_table[hash]);
 
        return 1;
 }
 
 /*
- *     UNhashes ip_vs_dest from ip_vs_rtable.
+ *     UNhashes ip_vs_dest from rs_table.
  *     should be called with locked tables.
  */
 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 {
        /*
-        * Remove it from the ip_vs_rtable table.
+        * Remove it from the rs_table table.
         */
        if (!list_empty(&dest->d_list)) {
                list_del(&dest->d_list);
@@ -577,10 +550,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  *     Lookup real service by <proto,addr,port> in the real service table.
  */
 struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
                          const union nf_inet_addr *daddr,
                          __be16 dport)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        unsigned hash;
        struct ip_vs_dest *dest;
 
@@ -590,19 +564,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
         */
        hash = ip_vs_rs_hashkey(af, daddr, dport);
 
-       read_lock(&__ip_vs_rs_lock);
-       list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+       read_lock(&ipvs->rs_lock);
+       list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
                if ((dest->af == af)
                    && ip_vs_addr_equal(af, &dest->addr, daddr)
                    && (dest->port == dport)
                    && ((dest->protocol == protocol) ||
                        dest->vfwmark)) {
                        /* HIT */
-                       read_unlock(&__ip_vs_rs_lock);
+                       read_unlock(&ipvs->rs_lock);
                        return dest;
                }
        }
-       read_unlock(&__ip_vs_rs_lock);
+       read_unlock(&ipvs->rs_lock);
 
        return NULL;
 }
@@ -641,15 +615,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  * ip_vs_lookup_real_service() looked promissing, but
  * seems not working as expected.
  */
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
+                                  const union nf_inet_addr *daddr,
                                   __be16 dport,
                                   const union nf_inet_addr *vaddr,
-                                  __be16 vport, __u16 protocol)
+                                  __be16 vport, __u16 protocol, __u32 fwmark)
 {
        struct ip_vs_dest *dest;
        struct ip_vs_service *svc;
 
-       svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+       svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
        if (!svc)
                return NULL;
        dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -674,11 +649,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                     __be16 dport)
 {
        struct ip_vs_dest *dest, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
        /*
         * Find the destination in trash
         */
-       list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+       list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
                IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
                              "dest->refcnt=%d\n",
                              dest->vfwmark,
@@ -709,6 +685,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                        list_del(&dest->n_list);
                        ip_vs_dst_reset(dest);
                        __ip_vs_unbind_svc(dest);
+                       free_percpu(dest->stats.cpustats);
                        kfree(dest);
                }
        }
@@ -726,25 +703,53 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  *  are expired, and the refcnt of each destination in the trash must
  *  be 1, so we simply release them here.
  */
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
 {
        struct ip_vs_dest *dest, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-       list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+       list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
                list_del(&dest->n_list);
                ip_vs_dst_reset(dest);
                __ip_vs_unbind_svc(dest);
+               free_percpu(dest->stats.cpustats);
                kfree(dest);
        }
 }
 
+static void
+ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+
+       spin_lock_bh(&src->lock);
+
+       IP_VS_SHOW_STATS_COUNTER(conns);
+       IP_VS_SHOW_STATS_COUNTER(inpkts);
+       IP_VS_SHOW_STATS_COUNTER(outpkts);
+       IP_VS_SHOW_STATS_COUNTER(inbytes);
+       IP_VS_SHOW_STATS_COUNTER(outbytes);
+
+       ip_vs_read_estimator(dst, src);
+
+       spin_unlock_bh(&src->lock);
+}
 
 static void
 ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
        spin_lock_bh(&stats->lock);
 
-       memset(&stats->ustats, 0, sizeof(stats->ustats));
+       /* get current counters as zero point, rates are zeroed */
+
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+
+       IP_VS_ZERO_STATS_COUNTER(conns);
+       IP_VS_ZERO_STATS_COUNTER(inpkts);
+       IP_VS_ZERO_STATS_COUNTER(outpkts);
+       IP_VS_ZERO_STATS_COUNTER(inbytes);
+       IP_VS_ZERO_STATS_COUNTER(outbytes);
+
        ip_vs_zero_estimator(stats);
 
        spin_unlock_bh(&stats->lock);
@@ -754,40 +759,28 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
  *     Update a destination in the given service
  */
 static void
-__ip_vs_update_dest(struct ip_vs_service *svc,
-                   struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
+__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
+                   struct ip_vs_dest_user_kern *udest, int add)
 {
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
        int conn_flags;
 
        /* set the weight and the flags */
        atomic_set(&dest->weight, udest->weight);
-       conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
-
-       /* check if local node and update the flags */
-#ifdef CONFIG_IP_VS_IPV6
-       if (svc->af == AF_INET6) {
-               if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
-                       conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-                               | IP_VS_CONN_F_LOCALNODE;
-               }
-       } else
-#endif
-               if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
-                       conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-                               | IP_VS_CONN_F_LOCALNODE;
-               }
+       conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
+       conn_flags |= IP_VS_CONN_F_INACTIVE;
 
        /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
-       if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
+       if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
        } else {
                /*
-                *    Put the real service in ip_vs_rtable if not present.
+                *    Put the real service in rs_table if not present.
                 *    For now only for NAT!
                 */
-               write_lock_bh(&__ip_vs_rs_lock);
-               ip_vs_rs_hash(dest);
-               write_unlock_bh(&__ip_vs_rs_lock);
+               write_lock_bh(&ipvs->rs_lock);
+               ip_vs_rs_hash(ipvs, dest);
+               write_unlock_bh(&ipvs->rs_lock);
        }
        atomic_set(&dest->conn_flags, conn_flags);
 
@@ -809,6 +802,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
                dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
        dest->u_threshold = udest->u_threshold;
        dest->l_threshold = udest->l_threshold;
+
+       spin_lock_bh(&dest->dst_lock);
+       ip_vs_dst_reset(dest);
+       spin_unlock_bh(&dest->dst_lock);
+
+       if (add)
+               ip_vs_start_estimator(svc->net, &dest->stats);
+
+       write_lock_bh(&__ip_vs_svc_lock);
+
+       /* Wait until all other svc users go away */
+       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+
+       if (add) {
+               list_add(&dest->n_list, &svc->destinations);
+               svc->num_dests++;
+       }
+
+       /* call the update_service, because server weight may be changed */
+       if (svc->scheduler->update_service)
+               svc->scheduler->update_service(svc);
+
+       write_unlock_bh(&__ip_vs_svc_lock);
 }
 
 
@@ -829,21 +845,26 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                atype = ipv6_addr_type(&udest->addr.in6);
                if ((!(atype & IPV6_ADDR_UNICAST) ||
                        atype & IPV6_ADDR_LINKLOCAL) &&
-                       !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+                       !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
                        return -EINVAL;
        } else
 #endif
        {
-               atype = inet_addr_type(&init_net, udest->addr.ip);
+               atype = inet_addr_type(svc->net, udest->addr.ip);
                if (atype != RTN_LOCAL && atype != RTN_UNICAST)
                        return -EINVAL;
        }
 
-       dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
+       dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
        if (dest == NULL) {
-               IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
+               pr_err("%s(): no memory.\n", __func__);
                return -ENOMEM;
        }
+       dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!dest->stats.cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto err_alloc;
+       }
 
        dest->af = svc->af;
        dest->protocol = svc->protocol;
@@ -856,18 +877,21 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
        atomic_set(&dest->activeconns, 0);
        atomic_set(&dest->inactconns, 0);
        atomic_set(&dest->persistconns, 0);
-       atomic_set(&dest->refcnt, 0);
+       atomic_set(&dest->refcnt, 1);
 
        INIT_LIST_HEAD(&dest->d_list);
        spin_lock_init(&dest->dst_lock);
        spin_lock_init(&dest->stats.lock);
-       __ip_vs_update_dest(svc, dest, udest);
-       ip_vs_new_estimator(&dest->stats);
+       __ip_vs_update_dest(svc, dest, udest, 1);
 
        *dest_p = dest;
 
        LeaveFunction(2);
        return 0;
+
+err_alloc:
+       kfree(dest);
+       return -ENOMEM;
 }
 
 
@@ -885,13 +909,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        EnterFunction(2);
 
        if (udest->weight < 0) {
-               IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+               pr_err("%s(): server weight less than zero\n", __func__);
                return -ERANGE;
        }
 
        if (udest->l_threshold > udest->u_threshold) {
-               IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
-                         "upper threshold\n");
+               pr_err("%s(): lower threshold is higher than upper threshold\n",
+                       __func__);
                return -ERANGE;
        }
 
@@ -903,7 +927,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        dest = ip_vs_lookup_dest(svc, &daddr, dport);
 
        if (dest != NULL) {
-               IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
+               IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
                return -EEXIST;
        }
 
@@ -922,65 +946,22 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
                              IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
                              ntohs(dest->vport));
 
-               __ip_vs_update_dest(svc, dest, udest);
-
                /*
                 * Get the destination from the trash
                 */
                list_del(&dest->n_list);
 
-               ip_vs_new_estimator(&dest->stats);
-
-               write_lock_bh(&__ip_vs_svc_lock);
-
+               __ip_vs_update_dest(svc, dest, udest, 1);
+               ret = 0;
+       } else {
                /*
-                * Wait until all other svc users go away.
+                * Allocate and initialize the dest structure
                 */
-               IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-               list_add(&dest->n_list, &svc->destinations);
-               svc->num_dests++;
-
-               /* call the update_service function of its scheduler */
-               if (svc->scheduler->update_service)
-                       svc->scheduler->update_service(svc);
-
-               write_unlock_bh(&__ip_vs_svc_lock);
-               return 0;
-       }
-
-       /*
-        * Allocate and initialize the dest structure
-        */
-       ret = ip_vs_new_dest(svc, udest, &dest);
-       if (ret) {
-               return ret;
+               ret = ip_vs_new_dest(svc, udest, &dest);
        }
-
-       /*
-        * Add the dest entry into the list
-        */
-       atomic_inc(&dest->refcnt);
-
-       write_lock_bh(&__ip_vs_svc_lock);
-
-       /*
-        * Wait until all other svc users go away.
-        */
-       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-       list_add(&dest->n_list, &svc->destinations);
-       svc->num_dests++;
-
-       /* call the update_service function of its scheduler */
-       if (svc->scheduler->update_service)
-               svc->scheduler->update_service(svc);
-
-       write_unlock_bh(&__ip_vs_svc_lock);
-
        LeaveFunction(2);
 
-       return 0;
+       return ret;
 }
 
 
@@ -997,13 +978,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        EnterFunction(2);
 
        if (udest->weight < 0) {
-               IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
+               pr_err("%s(): server weight less than zero\n", __func__);
                return -ERANGE;
        }
 
        if (udest->l_threshold > udest->u_threshold) {
-               IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
-                         "upper threshold\n");
+               pr_err("%s(): lower threshold is higher than upper threshold\n",
+                       __func__);
                return -ERANGE;
        }
 
@@ -1015,23 +996,11 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        dest = ip_vs_lookup_dest(svc, &daddr, dport);
 
        if (dest == NULL) {
-               IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
+               IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
                return -ENOENT;
        }
 
-       __ip_vs_update_dest(svc, dest, udest);
-
-       write_lock_bh(&__ip_vs_svc_lock);
-
-       /* Wait until all other svc users go away */
-       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-
-       /* call the update_service, because server weight may be changed */
-       if (svc->scheduler->update_service)
-               svc->scheduler->update_service(svc);
-
-       write_unlock_bh(&__ip_vs_svc_lock);
-
+       __ip_vs_update_dest(svc, dest, udest, 0);
        LeaveFunction(2);
 
        return 0;
@@ -1041,16 +1010,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 /*
  *     Delete a destination (must be already unlinked from the service)
  */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
-       ip_vs_kill_estimator(&dest->stats);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_stop_estimator(net, &dest->stats);
 
        /*
         *  Remove it from the d-linked list with the real services.
         */
-       write_lock_bh(&__ip_vs_rs_lock);
+       write_lock_bh(&ipvs->rs_lock);
        ip_vs_rs_unhash(dest);
-       write_unlock_bh(&__ip_vs_rs_lock);
+       write_unlock_bh(&ipvs->rs_lock);
 
        /*
         *  Decrease the refcnt of the dest, and free the dest
@@ -1058,6 +1029,10 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
         *  the destination into the trash.
         */
        if (atomic_dec_and_test(&dest->refcnt)) {
+               IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
+                             dest->vfwmark,
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
+                             ntohs(dest->port));
                ip_vs_dst_reset(dest);
                /* simply decrease svc->refcnt here, let the caller check
                   and release the service if nobody refers to it.
@@ -1065,6 +1040,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                   and only one user context can update virtual service at a
                   time, so the operation here is OK */
                atomic_dec(&dest->svc->refcnt);
+               free_percpu(dest->stats.cpustats);
                kfree(dest);
        } else {
                IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1072,7 +1048,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
                              ntohs(dest->port),
                              atomic_read(&dest->refcnt));
-               list_add(&dest->n_list, &ip_vs_dest_trash);
+               list_add(&dest->n_list, &ipvs->dest_trash);
                atomic_inc(&dest->refcnt);
        }
 }
@@ -1115,7 +1091,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
 
        if (dest == NULL) {
-               IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
+               IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
                return -ENOENT;
        }
 
@@ -1124,7 +1100,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        /*
         *      Wait until all other svc users go away.
         */
-       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
 
        /*
         *      Unlink dest from the service
@@ -1136,7 +1112,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        /*
         *      Delete the destination
         */
-       __ip_vs_del_dest(dest);
+       __ip_vs_del_dest(svc->net, dest);
 
        LeaveFunction(2);
 
@@ -1148,12 +1124,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  *     Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
                  struct ip_vs_service **svc_p)
 {
        int ret = 0;
        struct ip_vs_scheduler *sched = NULL;
+       struct ip_vs_pe *pe = NULL;
        struct ip_vs_service *svc = NULL;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
        /* increase the module use count */
        ip_vs_use_count_inc();
@@ -1161,10 +1139,19 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        /* Lookup the scheduler by 'u->sched_name' */
        sched = ip_vs_scheduler_get(u->sched_name);
        if (sched == NULL) {
-               IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
-                          u->sched_name);
+               pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
                ret = -ENOENT;
-               goto out_mod_dec;
+               goto out_err;
+       }
+
+       if (u->pe_name && *u->pe_name) {
+               pe = ip_vs_pe_getbyname(u->pe_name);
+               if (pe == NULL) {
+                       pr_info("persistence engine module ip_vs_pe_%s "
+                               "not found\n", u->pe_name);
+                       ret = -ENOENT;
+                       goto out_err;
+               }
        }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1174,15 +1161,20 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        }
 #endif
 
-       svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
+       svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
        if (svc == NULL) {
-               IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
+               IP_VS_DBG(1, "%s(): no memory\n", __func__);
                ret = -ENOMEM;
                goto out_err;
        }
+       svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!svc->stats.cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto out_err;
+       }
 
        /* I'm the first user of the service */
-       atomic_set(&svc->usecnt, 1);
+       atomic_set(&svc->usecnt, 0);
        atomic_set(&svc->refcnt, 0);
 
        svc->af = u->af;
@@ -1193,6 +1185,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        svc->flags = u->flags;
        svc->timeout = u->timeout * HZ;
        svc->netmask = u->netmask;
+       svc->net = net;
 
        INIT_LIST_HEAD(&svc->destinations);
        rwlock_init(&svc->sched_lock);
@@ -1204,17 +1197,21 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
                goto out_err;
        sched = NULL;
 
+       /* Bind the ct retriever */
+       ip_vs_bind_pe(svc, pe);
+       pe = NULL;
+
        /* Update the virtual service counters */
        if (svc->port == FTPPORT)
-               atomic_inc(&ip_vs_ftpsvc_counter);
+               atomic_inc(&ipvs->ftpsvc_counter);
        else if (svc->port == 0)
-               atomic_inc(&ip_vs_nullsvc_counter);
+               atomic_inc(&ipvs->nullsvc_counter);
 
-       ip_vs_new_estimator(&svc->stats);
+       ip_vs_start_estimator(net, &svc->stats);
 
        /* Count only IPv4 services for old get/setsockopt interface */
        if (svc->af == AF_INET)
-               ip_vs_num_services++;
+               ipvs->num_services++;
 
        /* Hash the service into the service table */
        write_lock_bh(&__ip_vs_svc_lock);
@@ -1222,22 +1219,26 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        write_unlock_bh(&__ip_vs_svc_lock);
 
        *svc_p = svc;
+       /* Now there is a service - full throttle */
+       ipvs->enable = 1;
        return 0;
 
-  out_err:
+
+ out_err:
        if (svc != NULL) {
-               if (svc->scheduler)
-                       ip_vs_unbind_scheduler(svc);
+               ip_vs_unbind_scheduler(svc);
                if (svc->inc) {
                        local_bh_disable();
                        ip_vs_app_inc_put(svc->inc);
                        local_bh_enable();
                }
+               if (svc->stats.cpustats)
+                       free_percpu(svc->stats.cpustats);
                kfree(svc);
        }
        ip_vs_scheduler_put(sched);
+       ip_vs_pe_put(pe);
 
-  out_mod_dec:
        /* decrease the module use count */
        ip_vs_use_count_dec();
 
@@ -1252,6 +1253,7 @@ static int
 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 {
        struct ip_vs_scheduler *sched, *old_sched;
+       struct ip_vs_pe *pe = NULL, *old_pe = NULL;
        int ret = 0;
 
        /*
@@ -1259,12 +1261,22 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
         */
        sched = ip_vs_scheduler_get(u->sched_name);
        if (sched == NULL) {
-               IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
-                          u->sched_name);
+               pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
                return -ENOENT;
        }
        old_sched = sched;
 
+       if (u->pe_name && *u->pe_name) {
+               pe = ip_vs_pe_getbyname(u->pe_name);
+               if (pe == NULL) {
+                       pr_info("persistence engine module ip_vs_pe_%s "
+                               "not found\n", u->pe_name);
+                       ret = -ENOENT;
+                       goto out;
+               }
+               old_pe = pe;
+       }
+
 #ifdef CONFIG_IP_VS_IPV6
        if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
                ret = -EINVAL;
@@ -1277,7 +1289,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
        /*
         * Wait until all other svc users go away.
         */
-       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
 
        /*
         * Set the flags and timeout value
@@ -1316,15 +1328,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
                }
        }
 
+       old_pe = svc->pe;
+       if (pe != old_pe) {
+               ip_vs_unbind_pe(svc);
+               ip_vs_bind_pe(svc, pe);
+       }
+
   out_unlock:
        write_unlock_bh(&__ip_vs_svc_lock);
-#ifdef CONFIG_IP_VS_IPV6
   out:
-#endif
-
-       if (old_sched)
-               ip_vs_scheduler_put(old_sched);
-
+       ip_vs_scheduler_put(old_sched);
+       ip_vs_pe_put(old_pe);
        return ret;
 }
 
@@ -1338,18 +1352,26 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 {
        struct ip_vs_dest *dest, *nxt;
        struct ip_vs_scheduler *old_sched;
+       struct ip_vs_pe *old_pe;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
+
+       pr_info("%s: enter\n", __func__);
 
        /* Count only IPv4 services for old get/setsockopt interface */
        if (svc->af == AF_INET)
-               ip_vs_num_services--;
+               ipvs->num_services--;
 
-       ip_vs_kill_estimator(&svc->stats);
+       ip_vs_stop_estimator(svc->net, &svc->stats);
 
        /* Unbind scheduler */
        old_sched = svc->scheduler;
        ip_vs_unbind_scheduler(svc);
-       if (old_sched)
-               ip_vs_scheduler_put(old_sched);
+       ip_vs_scheduler_put(old_sched);
+
+       /* Unbind persistence engine */
+       old_pe = svc->pe;
+       ip_vs_unbind_pe(svc);
+       ip_vs_pe_put(old_pe);
 
        /* Unbind app inc */
        if (svc->inc) {
@@ -1362,35 +1384,38 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
         */
        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
                __ip_vs_unlink_dest(svc, dest, 0);
-               __ip_vs_del_dest(dest);
+               __ip_vs_del_dest(svc->net, dest);
        }
 
        /*
         *    Update the virtual service counters
         */
        if (svc->port == FTPPORT)
-               atomic_dec(&ip_vs_ftpsvc_counter);
+               atomic_dec(&ipvs->ftpsvc_counter);
        else if (svc->port == 0)
-               atomic_dec(&ip_vs_nullsvc_counter);
+               atomic_dec(&ipvs->nullsvc_counter);
 
        /*
         *    Free the service if nobody refers to it
         */
-       if (atomic_read(&svc->refcnt) == 0)
+       if (atomic_read(&svc->refcnt) == 0) {
+               IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
+                             svc->fwmark,
+                             IP_VS_DBG_ADDR(svc->af, &svc->addr),
+                             ntohs(svc->port), atomic_read(&svc->usecnt));
+               free_percpu(svc->stats.cpustats);
                kfree(svc);
+       }
 
        /* decrease the module use count */
        ip_vs_use_count_dec();
 }
 
 /*
- *     Delete a service from the service list
+ * Unlink a service from list and try to delete it if its refcnt reached 0
  */
-static int ip_vs_del_service(struct ip_vs_service *svc)
+static void ip_vs_unlink_service(struct ip_vs_service *svc)
 {
-       if (svc == NULL)
-               return -EEXIST;
-
        /*
         * Unhash it from the service table
         */
@@ -1401,11 +1426,21 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
        /*
         * Wait until all the svc users go away.
         */
-       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
+       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
 
        __ip_vs_del_service(svc);
 
        write_unlock_bh(&__ip_vs_svc_lock);
+}
+
+/*
+ *     Delete a service from the service list
+ */
+static int ip_vs_del_service(struct ip_vs_service *svc)
+{
+       if (svc == NULL)
+               return -EEXIST;
+       ip_vs_unlink_service(svc);
 
        return 0;
 }
@@ -1414,24 +1449,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *     Flush all the virtual services
  */
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
 {
        int idx;
        struct ip_vs_service *svc, *nxt;
 
        /*
-        * Flush the service table hashed by <protocol,addr,port>
+        * Flush the service table hashed by <netns,protocol,addr,port>
         */
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-               list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
-                       write_lock_bh(&__ip_vs_svc_lock);
-                       ip_vs_svc_unhash(svc);
-                       /*
-                        * Wait until all the svc users go away.
-                        */
-                       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-                       __ip_vs_del_service(svc);
-                       write_unlock_bh(&__ip_vs_svc_lock);
+               list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
+                                        s_list) {
+                       if (net_eq(svc->net, net))
+                               ip_vs_unlink_service(svc);
                }
        }
 
@@ -1441,20 +1471,92 @@ static int ip_vs_flush(void)
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry_safe(svc, nxt,
                                         &ip_vs_svc_fwm_table[idx], f_list) {
-                       write_lock_bh(&__ip_vs_svc_lock);
-                       ip_vs_svc_unhash(svc);
-                       /*
-                        * Wait until all the svc users go away.
-                        */
-                       IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-                       __ip_vs_del_service(svc);
-                       write_unlock_bh(&__ip_vs_svc_lock);
+                       if (net_eq(svc->net, net))
+                               ip_vs_unlink_service(svc);
                }
        }
 
        return 0;
 }
 
+/*
+ *     Delete service by {netns} in the service table.
+ *     Called by __ip_vs_cleanup()
+ */
+void __ip_vs_service_cleanup(struct net *net)
+{
+       EnterFunction(2);
+       /* Check for "full" addressed entries */
+       mutex_lock(&__ip_vs_mutex);
+       ip_vs_flush(net);
+       mutex_unlock(&__ip_vs_mutex);
+       LeaveFunction(2);
+}
+/*
+ * Release dst hold by dst_cache
+ */
+static inline void
+__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
+{
+       spin_lock_bh(&dest->dst_lock);
+       if (dest->dst_cache && dest->dst_cache->dev == dev) {
+               IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
+                             dev->name,
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
+                             ntohs(dest->port),
+                             atomic_read(&dest->refcnt));
+               ip_vs_dst_reset(dest);
+       }
+       spin_unlock_bh(&dest->dst_lock);
+
+}
+/*
+ * Netdev event receiver
+ * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
+ * a device that is "unregister" it must be released.
+ */
+static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
+                           void *ptr)
+{
+       struct net_device *dev = ptr;
+       struct net *net = dev_net(dev);
+       struct ip_vs_service *svc;
+       struct ip_vs_dest *dest;
+       unsigned int idx;
+
+       if (event != NETDEV_UNREGISTER)
+               return NOTIFY_DONE;
+       IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
+       EnterFunction(2);
+       mutex_lock(&__ip_vs_mutex);
+       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+               list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+                       if (net_eq(svc->net, net)) {
+                               list_for_each_entry(dest, &svc->destinations,
+                                                   n_list) {
+                                       __ip_vs_dev_reset(dest, dev);
+                               }
+                       }
+               }
+
+               list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+                       if (net_eq(svc->net, net)) {
+                               list_for_each_entry(dest, &svc->destinations,
+                                                   n_list) {
+                                       __ip_vs_dev_reset(dest, dev);
+                               }
+                       }
+
+               }
+       }
+
+       list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
+               __ip_vs_dev_reset(dest, dev);
+       }
+       mutex_unlock(&__ip_vs_mutex);
+       LeaveFunction(2);
+       return NOTIFY_DONE;
+}
 
 /*
  *     Zero counters in a service or all services
@@ -1472,51 +1574,53 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
        return 0;
 }
 
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
 {
        int idx;
        struct ip_vs_service *svc;
 
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                       ip_vs_zero_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_zero_service(svc);
                }
        }
 
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                       ip_vs_zero_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_zero_service(svc);
                }
        }
 
-       ip_vs_zero_stats(&ip_vs_stats);
+       ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
        return 0;
 }
 
-
+#ifdef CONFIG_SYSCTL
 static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+proc_do_defense_mode(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+       struct net *net = current->nsproxy->net_ns;
        int *valp = table->data;
        int val = *valp;
        int rc;
 
-       rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
        if (write && (*valp != val)) {
                if ((*valp < 0) || (*valp > 3)) {
                        /* Restore the correct value */
                        *valp = val;
                } else {
-                       update_defense_level();
+                       update_defense_level(net_ipvs(net));
                }
        }
        return rc;
 }
 
-
 static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+proc_do_sync_threshold(ctl_table *table, int write,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int *valp = table->data;
@@ -1526,7 +1630,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
        /* backup the value first */
        memcpy(val, valp, sizeof(val));
 
-       rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
        if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
                /* Restore the correct value */
                memcpy(valp, val, sizeof(val));
@@ -1534,193 +1638,226 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
        return rc;
 }
 
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+                    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int *valp = table->data;
+       int val = *valp;
+       int rc;
+
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (write && (*valp != val)) {
+               if ((*valp < 0) || (*valp > 1)) {
+                       /* Restore the correct value */
+                       *valp = val;
+               } else {
+                       struct net *net = current->nsproxy->net_ns;
+                       ip_vs_sync_switch_mode(net, val);
+               }
+       }
+       return rc;
+}
 
 /*
  *     IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ *     Do not change order or insert new entries without
+ *     align with netns init in __ip_vs_control_init()
  */
 
 static struct ctl_table vs_vars[] = {
        {
                .procname       = "amemthresh",
-               .data           = &sysctl_ip_vs_amemthresh,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
-#ifdef CONFIG_IP_VS_DEBUG
        {
-               .procname       = "debug_level",
-               .data           = &sysctl_ip_vs_debug_level,
+               .procname       = "am_droprate",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "drop_entry",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_do_defense_mode,
+       },
+       {
+               .procname       = "drop_packet",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_do_defense_mode,
+       },
+#ifdef CONFIG_IP_VS_NFCT
+       {
+               .procname       = "conntrack",
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
 #endif
        {
-               .procname       = "am_droprate",
-               .data           = &sysctl_ip_vs_am_droprate,
+               .procname       = "secure_tcp",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_do_defense_mode,
+       },
+       {
+               .procname       = "snat_reroute",
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
        {
-               .procname       = "drop_entry",
-               .data           = &sysctl_ip_vs_drop_entry,
+               .procname       = "sync_version",
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_do_defense_mode,
+               .proc_handler   = &proc_do_sync_mode,
        },
        {
-               .procname       = "drop_packet",
-               .data           = &sysctl_ip_vs_drop_packet,
+               .procname       = "cache_bypass",
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_do_defense_mode,
+               .proc_handler   = proc_dointvec,
        },
        {
-               .procname       = "secure_tcp",
-               .data           = &sysctl_ip_vs_secure_tcp,
+               .procname       = "expire_nodest_conn",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "expire_quiescent_template",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "sync_threshold",
+               .maxlen         =
+                       sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+               .mode           = 0644,
+               .proc_handler   = proc_do_sync_threshold,
+       },
+       {
+               .procname       = "nat_icmp_send",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#ifdef CONFIG_IP_VS_DEBUG
+       {
+               .procname       = "debug_level",
+               .data           = &sysctl_ip_vs_debug_level,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_do_defense_mode,
+               .proc_handler   = proc_dointvec,
        },
+#endif
 #if 0
        {
                .procname       = "timeout_established",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_synsent",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_synrecv",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_finwait",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_timewait",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_close",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_closewait",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_lastack",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_listen",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_synack",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_udp",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname       = "timeout_icmp",
                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_jiffies,
+               .proc_handler   = proc_dointvec_jiffies,
        },
 #endif
-       {
-               .procname       = "cache_bypass",
-               .data           = &sysctl_ip_vs_cache_bypass,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
-       },
-       {
-               .procname       = "expire_nodest_conn",
-               .data           = &sysctl_ip_vs_expire_nodest_conn,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
-       },
-       {
-               .procname       = "expire_quiescent_template",
-               .data           = &sysctl_ip_vs_expire_quiescent_template,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
-       },
-       {
-               .procname       = "sync_threshold",
-               .data           = &sysctl_ip_vs_sync_threshold,
-               .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
-               .mode           = 0644,
-               .proc_handler   = &proc_do_sync_threshold,
-       },
-       {
-               .procname       = "nat_icmp_send",
-               .data           = &sysctl_ip_vs_nat_icmp_send,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
-       },
-       { .ctl_name = 0 }
+       { }
 };
 
 const struct ctl_path net_vs_ctl_path[] = {
-       { .procname = "net", .ctl_name = CTL_NET, },
-       { .procname = "ipv4", .ctl_name = NET_IPV4, },
+       { .procname = "net", },
+       { .procname = "ipv4", },
        { .procname = "vs", },
        { }
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
-
-static struct ctl_table_header * sysctl_header;
+#endif
 
 #ifdef CONFIG_PROC_FS
 
 struct ip_vs_iter {
+       struct seq_net_private p;  /* Do not move this, netns depends upon it*/
        struct list_head *table;
        int bucket;
 };
@@ -1747,6 +1884,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
 /* Get the Nth entry in the two lists */
 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 {
+       struct net *net = seq_file_net(seq);
        struct ip_vs_iter *iter = seq->private;
        int idx;
        struct ip_vs_service *svc;
@@ -1754,7 +1892,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
        /* look in hash by protocol */
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                       if (pos-- == 0){
+                       if (net_eq(svc->net, net) && pos-- == 0) {
                                iter->table = ip_vs_svc_table;
                                iter->bucket = idx;
                                return svc;
@@ -1765,7 +1903,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
        /* keep looking in fwmark */
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                       if (pos-- == 0) {
+                       if (net_eq(svc->net, net) && pos-- == 0) {
                                iter->table = ip_vs_svc_fwm_table;
                                iter->bucket = idx;
                                return svc;
@@ -1842,7 +1980,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
        if (v == SEQ_START_TOKEN) {
                seq_printf(seq,
                        "IP Virtual Server version %d.%d.%d (size=%d)\n",
-                       NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+                       NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
                seq_puts(seq,
                         "Prot LocalAddress:Port Scheduler Flags\n");
                seq_puts(seq,
@@ -1862,14 +2000,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
                                           svc->scheduler->name);
                        else
 #endif
-                               seq_printf(seq, "%s  %08X:%04X %s ",
+                               seq_printf(seq, "%s  %08X:%04X %s %s ",
                                           ip_vs_proto_name(svc->protocol),
                                           ntohl(svc->addr.ip),
                                           ntohs(svc->port),
-                                          svc->scheduler->name);
+                                          svc->scheduler->name,
+                                          (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
                } else {
-                       seq_printf(seq, "FWM  %08X %s ",
-                                  svc->fwmark, svc->scheduler->name);
+                       seq_printf(seq, "FWM  %08X %s %s",
+                                  svc->fwmark, svc->scheduler->name,
+                                  (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
                }
 
                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
@@ -1917,7 +2057,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
 
 static int ip_vs_info_open(struct inode *inode, struct file *file)
 {
-       return seq_open_private(file, &ip_vs_info_seq_ops,
+       return seq_open_net(inode, file, &ip_vs_info_seq_ops,
                        sizeof(struct ip_vs_iter));
 }
 
@@ -1926,18 +2066,16 @@ static const struct file_operations ip_vs_info_fops = {
        .open    = ip_vs_info_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
-       .release = seq_release_private,
+       .release = seq_release_net,
 };
 
 #endif
 
-struct ip_vs_stats ip_vs_stats = {
-       .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
 #ifdef CONFIG_PROC_FS
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
+       struct net *net = seq_file_single_net(seq);
+       struct ip_vs_stats_user show;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
@@ -1945,29 +2083,25 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
        seq_printf(seq,
                   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
-       spin_lock_bh(&ip_vs_stats.lock);
-       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
-                  ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
-                  (unsigned long long) ip_vs_stats.ustats.inbytes,
-                  (unsigned long long) ip_vs_stats.ustats.outbytes);
+       ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
+       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
+                  show.inpkts, show.outpkts,
+                  (unsigned long long) show.inbytes,
+                  (unsigned long long) show.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-       seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-                       ip_vs_stats.ustats.cps,
-                       ip_vs_stats.ustats.inpps,
-                       ip_vs_stats.ustats.outpps,
-                       ip_vs_stats.ustats.inbps,
-                       ip_vs_stats.ustats.outbps);
-       spin_unlock_bh(&ip_vs_stats.lock);
+       seq_printf(seq, "%8X %8X %8X %16X %16X\n",
+                       show.cps, show.inpps, show.outpps,
+                       show.inbps, show.outbps);
 
        return 0;
 }
 
 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
 {
-       return single_open(file, ip_vs_stats_show, NULL);
+       return single_open_net(inode, file, ip_vs_stats_show);
 }
 
 static const struct file_operations ip_vs_stats_fops = {
@@ -1975,16 +2109,88 @@ static const struct file_operations ip_vs_stats_fops = {
        .open = ip_vs_stats_seq_open,
        .read = seq_read,
        .llseek = seq_lseek,
-       .release = single_release,
+       .release = single_release_net,
 };
 
+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
+{
+       struct net *net = seq_file_single_net(seq);
+       struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
+       struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
+       struct ip_vs_stats_user rates;
+       int i;
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_puts(seq,
+                "       Total Incoming Outgoing         Incoming         Outgoing\n");
+       seq_printf(seq,
+                  "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
+
+       for_each_possible_cpu(i) {
+               struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
+               unsigned int start;
+               __u64 inbytes, outbytes;
+
+               do {
+                       start = u64_stats_fetch_begin_bh(&u->syncp);
+                       inbytes = u->ustats.inbytes;
+                       outbytes = u->ustats.outbytes;
+               } while (u64_stats_fetch_retry_bh(&u->syncp, start));
+
+               seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
+                          i, u->ustats.conns, u->ustats.inpkts,
+                          u->ustats.outpkts, (__u64)inbytes,
+                          (__u64)outbytes);
+       }
+
+       spin_lock_bh(&tot_stats->lock);
+
+       seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
+                  tot_stats->ustats.conns, tot_stats->ustats.inpkts,
+                  tot_stats->ustats.outpkts,
+                  (unsigned long long) tot_stats->ustats.inbytes,
+                  (unsigned long long) tot_stats->ustats.outbytes);
+
+       ip_vs_read_estimator(&rates, tot_stats);
+
+       spin_unlock_bh(&tot_stats->lock);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_puts(seq,
+                  "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+       seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
+                       rates.cps,
+                       rates.inpps,
+                       rates.outpps,
+                       rates.inbps,
+                       rates.outbps);
+
+       return 0;
+}
+
+static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
+{
+       return single_open_net(inode, file, ip_vs_stats_percpu_show);
+}
+
+static const struct file_operations ip_vs_stats_percpu_fops = {
+       .owner = THIS_MODULE,
+       .open = ip_vs_stats_percpu_seq_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release_net,
+};
 #endif
 
 /*
  *     Set timeout values for tcp tcpfin udp in the timeout_table.
  */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
 {
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
+       struct ip_vs_proto_data *pd;
+#endif
+
        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
                  u->tcp_timeout,
                  u->tcp_fin_timeout,
@@ -1992,19 +2198,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
 
 #ifdef CONFIG_IP_VS_PROTO_TCP
        if (u->tcp_timeout) {
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+               pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+               pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
                        = u->tcp_timeout * HZ;
        }
 
        if (u->tcp_fin_timeout) {
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+               pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+               pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
                        = u->tcp_fin_timeout * HZ;
        }
 #endif
 
 #ifdef CONFIG_IP_VS_PROTO_UDP
        if (u->udp_timeout) {
-               ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+               pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+               pd->timeout_table[IP_VS_UDP_S_NORMAL]
                        = u->udp_timeout * HZ;
        }
 #endif
@@ -2037,6 +2246,8 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
                                  struct ip_vs_service_user *usvc_compat)
 {
+       memset(usvc, 0, sizeof(*usvc));
+
        usvc->af                = AF_INET;
        usvc->protocol          = usvc_compat->protocol;
        usvc->addr.ip           = usvc_compat->addr;
@@ -2054,6 +2265,8 @@ static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
                                   struct ip_vs_dest_user *udest_compat)
 {
+       memset(udest, 0, sizeof(*udest));
+
        udest->addr.ip          = udest_compat->addr;
        udest->port             = udest_compat->port;
        udest->conn_flags       = udest_compat->conn_flags;
@@ -2065,6 +2278,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
 static int
 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
+       struct net *net = sock_net(sk);
        int ret;
        unsigned char arg[MAX_ARG_LEN];
        struct ip_vs_service_user *usvc_compat;
@@ -2076,9 +2290,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;
 
+       if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
+               return -EINVAL;
+       if (len < 0 || len >  MAX_ARG_LEN)
+               return -EINVAL;
        if (len != set_arglen[SET_CMDID(cmd)]) {
-               IP_VS_ERR("set_ctl: len %u != %u\n",
-                         len, set_arglen[SET_CMDID(cmd)]);
+               pr_err("set_ctl: len %u != %u\n",
+                      len, set_arglen[SET_CMDID(cmd)]);
                return -EINVAL;
        }
 
@@ -2095,19 +2313,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
        if (cmd == IP_VS_SO_SET_FLUSH) {
                /* Flush the virtual service */
-               ret = ip_vs_flush();
+               ret = ip_vs_flush(net);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
                /* Set timeout values for (tcp tcpfin udp) */
-               ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+               ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-               ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+               ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+                                       dm->syncid);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-               ret = stop_sync_thread(dm->state);
+               ret = stop_sync_thread(net, dm->state);
                goto out_unlock;
        }
 
@@ -2122,26 +2341,27 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
        if (cmd == IP_VS_SO_SET_ZERO) {
                /* if no service address is set, zero counters in all */
                if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-                       ret = ip_vs_zero_all();
+                       ret = ip_vs_zero_all(net);
                        goto out_unlock;
                }
        }
 
-       /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
-       if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
-               IP_VS_ERR("set_ctl: invalid protocol: %d %pI4:%d %s\n",
-                         usvc.protocol, &usvc.addr.ip,
-                         ntohs(usvc.port), usvc.sched_name);
+       /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
+       if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
+           usvc.protocol != IPPROTO_SCTP) {
+               pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
+                      usvc.protocol, &usvc.addr.ip,
+                      ntohs(usvc.port), usvc.sched_name);
                ret = -EFAULT;
                goto out_unlock;
        }
 
        /* Lookup the exact service by <protocol, addr, port> or fwmark */
        if (usvc.fwmark == 0)
-               svc = __ip_vs_service_get(usvc.af, usvc.protocol,
-                                         &usvc.addr, usvc.port);
+               svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
+                                          &usvc.addr, usvc.port);
        else
-               svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
+               svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
 
        if (cmd != IP_VS_SO_SET_ADD
            && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2154,7 +2374,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                if (svc != NULL)
                        ret = -EEXIST;
                else
-                       ret = ip_vs_add_service(&usvc, &svc);
+                       ret = ip_vs_add_service(net, &usvc, &svc);
                break;
        case IP_VS_SO_SET_EDIT:
                ret = ip_vs_edit_service(svc, &usvc);
@@ -2180,9 +2400,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                ret = -EINVAL;
        }
 
-       if (svc)
-               ip_vs_service_put(svc);
-
   out_unlock:
        mutex_unlock(&__ip_vs_mutex);
   out_dec:
@@ -2194,14 +2411,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 
 static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
-{
-       spin_lock_bh(&src->lock);
-       memcpy(dst, &src->ustats, sizeof(*dst));
-       spin_unlock_bh(&src->lock);
-}
-
-static void
 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 {
        dst->protocol = src->protocol;
@@ -2217,7 +2426,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 }
 
 static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net,
+                           const struct ip_vs_get_services *get,
                            struct ip_vs_get_services __user *uptr)
 {
        int idx, count=0;
@@ -2228,7 +2438,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
                        /* Only expose IPv4 entries to old interface */
-                       if (svc->af != AF_INET)
+                       if (svc->af != AF_INET || !net_eq(svc->net, net))
                                continue;
 
                        if (count >= get->num_services)
@@ -2247,7 +2457,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
                        /* Only expose IPv4 entries to old interface */
-                       if (svc->af != AF_INET)
+                       if (svc->af != AF_INET || !net_eq(svc->net, net))
                                continue;
 
                        if (count >= get->num_services)
@@ -2267,7 +2477,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 }
 
 static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
                         struct ip_vs_get_dests __user *uptr)
 {
        struct ip_vs_service *svc;
@@ -2275,10 +2485,10 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
        int ret = 0;
 
        if (get->fwmark)
-               svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
+               svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
        else
-               svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
-                                         get->port);
+               svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
+                                          get->port);
 
        if (svc) {
                int count = 0;
@@ -2306,24 +2516,27 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
                        }
                        count++;
                }
-               ip_vs_service_put(svc);
        } else
                ret = -ESRCH;
        return ret;
 }
 
 static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
 {
+#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
+       struct ip_vs_proto_data *pd;
+#endif
+
 #ifdef CONFIG_IP_VS_PROTO_TCP
-       u->tcp_timeout =
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-       u->tcp_fin_timeout =
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+       pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+       u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+       u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
 #endif
 #ifdef CONFIG_IP_VS_PROTO_UDP
+       pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
        u->udp_timeout =
-               ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+                       pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
 #endif
 }
 
@@ -2351,17 +2564,28 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 {
        unsigned char arg[128];
        int ret = 0;
+       unsigned int copylen;
+       struct net *net = sock_net(sk);
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
+       BUG_ON(!net);
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;
 
+       if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
+               return -EINVAL;
+
        if (*len < get_arglen[GET_CMDID(cmd)]) {
-               IP_VS_ERR("get_ctl: len %u < %u\n",
-                         *len, get_arglen[GET_CMDID(cmd)]);
+               pr_err("get_ctl: len %u < %u\n",
+                      *len, get_arglen[GET_CMDID(cmd)]);
                return -EINVAL;
        }
 
-       if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
+       copylen = get_arglen[GET_CMDID(cmd)];
+       if (copylen > 128)
+               return -EINVAL;
+
+       if (copy_from_user(arg, user, copylen) != 0)
                return -EFAULT;
 
        if (mutex_lock_interruptible(&__ip_vs_mutex))
@@ -2373,7 +2597,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                char buf[64];
 
                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
-                       NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+                       NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
                        ret = -EFAULT;
                        goto out;
@@ -2386,8 +2610,8 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
        {
                struct ip_vs_getinfo info;
                info.version = IP_VS_VERSION_CODE;
-               info.size = IP_VS_CONN_TAB_SIZE;
-               info.num_services = ip_vs_num_services;
+               info.size = ip_vs_conn_tab_size;
+               info.num_services = ipvs->num_services;
                if (copy_to_user(user, &info, sizeof(info)) != 0)
                        ret = -EFAULT;
        }
@@ -2402,11 +2626,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                size = sizeof(*get) +
                        sizeof(struct ip_vs_service_entry) * get->num_services;
                if (*len != size) {
-                       IP_VS_ERR("length: %u != %u\n", *len, size);
+                       pr_err("length: %u != %u\n", *len, size);
                        ret = -EINVAL;
                        goto out;
                }
-               ret = __ip_vs_get_service_entries(get, user);
+               ret = __ip_vs_get_service_entries(net, get, user);
        }
        break;
 
@@ -2419,15 +2643,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                entry = (struct ip_vs_service_entry *)arg;
                addr.ip = entry->addr;
                if (entry->fwmark)
-                       svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
+                       svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
                else
-                       svc = __ip_vs_service_get(AF_INET, entry->protocol,
-                                                 &addr, entry->port);
+                       svc = __ip_vs_service_find(net, AF_INET,
+                                                  entry->protocol, &addr,
+                                                  entry->port);
                if (svc) {
                        ip_vs_copy_service(entry, svc);
                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
                                ret = -EFAULT;
-                       ip_vs_service_put(svc);
                } else
                        ret = -ESRCH;
        }
@@ -2442,11 +2666,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                size = sizeof(*get) +
                        sizeof(struct ip_vs_dest_entry) * get->num_dests;
                if (*len != size) {
-                       IP_VS_ERR("length: %u != %u\n", *len, size);
+                       pr_err("length: %u != %u\n", *len, size);
                        ret = -EINVAL;
                        goto out;
                }
-               ret = __ip_vs_get_dest_entries(get, user);
+               ret = __ip_vs_get_dest_entries(net, get, user);
        }
        break;
 
@@ -2454,7 +2678,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
        {
                struct ip_vs_timeout_user t;
 
-               __ip_vs_get_timeouts(&t);
+               __ip_vs_get_timeouts(net, &t);
                if (copy_to_user(user, &t, sizeof(t)) != 0)
                        ret = -EFAULT;
        }
@@ -2465,15 +2689,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                struct ip_vs_daemon_user d[2];
 
                memset(&d, 0, sizeof(d));
-               if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+               if (ipvs->sync_state & IP_VS_STATE_MASTER) {
                        d[0].state = IP_VS_STATE_MASTER;
-                       strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
-                       d[0].syncid = ip_vs_master_syncid;
+                       strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+                               sizeof(d[0].mcast_ifn));
+                       d[0].syncid = ipvs->master_syncid;
                }
-               if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+               if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
                        d[1].state = IP_VS_STATE_BACKUP;
-                       strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
-                       d[1].syncid = ip_vs_backup_syncid;
+                       strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+                               sizeof(d[1].mcast_ifn));
+                       d[1].syncid = ipvs->backup_syncid;
                }
                if (copy_to_user(user, &d, sizeof(d)) != 0)
                        ret = -EFAULT;
@@ -2512,6 +2738,7 @@ static struct genl_family ip_vs_genl_family = {
        .name           = IPVS_GENL_NAME,
        .version        = IPVS_GENL_VERSION,
        .maxattr        = IPVS_CMD_MAX,
+       .netnsok        = true,         /* Make ipvsadm to work on netns */
 };
 
 /* Policy used for first-level command attributes */
@@ -2542,6 +2769,8 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
        [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
        [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
                                            .len = IP_VS_SCHEDNAME_MAXLEN },
+       [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
+                                           .len = IP_VS_PENAME_MAXLEN },
        [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
                                            .len = sizeof(struct ip_vs_flags) },
        [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
@@ -2567,31 +2796,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
                                 struct ip_vs_stats *stats)
 {
+       struct ip_vs_stats_user ustats;
        struct nlattr *nl_stats = nla_nest_start(skb, container_type);
        if (!nl_stats)
                return -EMSGSIZE;
 
-       spin_lock_bh(&stats->lock);
-
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
-       NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
-       NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
+       ip_vs_copy_stats(&ustats, stats);
 
-       spin_unlock_bh(&stats->lock);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
+       NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
+       NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
 
        nla_nest_end(skb, nl_stats);
 
        return 0;
 
 nla_put_failure:
-       spin_unlock_bh(&stats->lock);
        nla_nest_cancel(skb, nl_stats);
        return -EMSGSIZE;
 }
@@ -2618,6 +2845,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
        }
 
        NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+       if (svc->pe)
+               NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
        NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
        NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
        NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
@@ -2662,11 +2891,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
        int idx = 0, i;
        int start = cb->args[0];
        struct ip_vs_service *svc;
+       struct net *net = skb_sknet(skb);
 
        mutex_lock(&__ip_vs_mutex);
        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
                list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
-                       if (++idx <= start)
+                       if (++idx <= start || !net_eq(svc->net, net))
                                continue;
                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
                                idx--;
@@ -2677,7 +2907,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 
        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
-                       if (++idx <= start)
+                       if (++idx <= start || !net_eq(svc->net, net))
                                continue;
                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
                                idx--;
@@ -2693,11 +2923,14 @@ nla_put_failure:
        return skb->len;
 }
 
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
-                                   struct nlattr *nla, int full_entry)
+static int ip_vs_genl_parse_service(struct net *net,
+                                   struct ip_vs_service_user_kern *usvc,
+                                   struct nlattr *nla, int full_entry,
+                                   struct ip_vs_service **ret_svc)
 {
        struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
        struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+       struct ip_vs_service *svc;
 
        /* Parse mandatory identifying service fields first */
        if (nla == NULL ||
@@ -2713,6 +2946,8 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
        if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
                return -EINVAL;
 
+       memset(usvc, 0, sizeof(*usvc));
+
        usvc->af = nla_get_u16(nla_af);
 #ifdef CONFIG_IP_VS_IPV6
        if (usvc->af != AF_INET && usvc->af != AF_INET6)
@@ -2731,14 +2966,21 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
                usvc->fwmark = 0;
        }
 
+       if (usvc->fwmark)
+               svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
+       else
+               svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
+                                          &usvc->addr, usvc->port);
+       *ret_svc = svc;
+
        /* If a full entry was requested, check for the additional fields */
        if (full_entry) {
-               struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+               struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
                              *nla_netmask;
                struct ip_vs_flags flags;
-               struct ip_vs_service *svc;
 
                nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+               nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
                nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
                nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
                nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
@@ -2749,21 +2991,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
                nla_memcpy(&flags, nla_flags, sizeof(flags));
 
                /* prefill flags from service if it already exists */
-               if (usvc->fwmark)
-                       svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
-               else
-                       svc = __ip_vs_service_get(usvc->af, usvc->protocol,
-                                                 &usvc->addr, usvc->port);
-               if (svc) {
+               if (svc)
                        usvc->flags = svc->flags;
-                       ip_vs_service_put(svc);
-               } else
-                       usvc->flags = 0;
 
                /* set new flags from userland */
                usvc->flags = (usvc->flags & ~flags.mask) |
                              (flags.flags & flags.mask);
                usvc->sched_name = nla_data(nla_sched);
+               usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
                usvc->timeout = nla_get_u32(nla_timeout);
                usvc->netmask = nla_get_u32(nla_netmask);
        }
@@ -2771,20 +3006,15 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
        return 0;
 }
 
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+                                                    struct nlattr *nla)
 {
        struct ip_vs_service_user_kern usvc;
+       struct ip_vs_service *svc;
        int ret;
 
-       ret = ip_vs_genl_parse_service(&usvc, nla, 0);
-       if (ret)
-               return ERR_PTR(ret);
-
-       if (usvc.fwmark)
-               return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
-       else
-               return __ip_vs_service_get(usvc.af, usvc.protocol,
-                                          &usvc.addr, usvc.port);
+       ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
+       return ret ? ERR_PTR(ret) : svc;
 }
 
 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
@@ -2851,6 +3081,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
        struct ip_vs_service *svc;
        struct ip_vs_dest *dest;
        struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+       struct net *net = skb_sknet(skb);
 
        mutex_lock(&__ip_vs_mutex);
 
@@ -2859,7 +3090,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
                        IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
                goto out_err;
 
-       svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+
+       svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
        if (IS_ERR(svc) || svc == NULL)
                goto out_err;
 
@@ -2875,7 +3107,6 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 
 nla_put_failure:
        cb->args[0] = idx;
-       ip_vs_service_put(svc);
 
 out_err:
        mutex_unlock(&__ip_vs_mutex);
@@ -2900,6 +3131,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
        if (!(nla_addr && nla_port))
                return -EINVAL;
 
+       memset(udest, 0, sizeof(*udest));
+
        nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
        udest->port = nla_get_u16(nla_port);
 
@@ -2972,20 +3205,23 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
                                   struct netlink_callback *cb)
 {
+       struct net *net = skb_sknet(skb);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
        mutex_lock(&__ip_vs_mutex);
-       if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+       if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
-                                          ip_vs_master_mcast_ifn,
-                                          ip_vs_master_syncid, cb) < 0)
+                                          ipvs->master_mcast_ifn,
+                                          ipvs->master_syncid, cb) < 0)
                        goto nla_put_failure;
 
                cb->args[0] = 1;
        }
 
-       if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+       if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
-                                          ip_vs_backup_mcast_ifn,
-                                          ip_vs_backup_syncid, cb) < 0)
+                                          ipvs->backup_mcast_ifn,
+                                          ipvs->backup_syncid, cb) < 0)
                        goto nla_put_failure;
 
                cb->args[1] = 1;
@@ -2997,31 +3233,33 @@ nla_put_failure:
        return skb->len;
 }
 
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
 {
        if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
              attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
              attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
                return -EINVAL;
 
-       return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+       return start_sync_thread(net,
+                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
                                 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
 }
 
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
 {
        if (!attrs[IPVS_DAEMON_ATTR_STATE])
                return -EINVAL;
 
-       return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+       return stop_sync_thread(net,
+                               nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
 }
 
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
 {
        struct ip_vs_timeout_user t;
 
-       __ip_vs_get_timeouts(&t);
+       __ip_vs_get_timeouts(net, &t);
 
        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
                t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3033,7 +3271,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
        if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
                t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
 
-       return ip_vs_set_timeout(&t);
+       return ip_vs_set_timeout(net, &t);
 }
 
 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3043,16 +3281,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
        struct ip_vs_dest_user_kern udest;
        int ret = 0, cmd;
        int need_full_svc = 0, need_full_dest = 0;
+       struct net *net;
+       struct netns_ipvs *ipvs;
 
+       net = skb_sknet(skb);
+       ipvs = net_ipvs(net);
        cmd = info->genlhdr->cmd;
 
        mutex_lock(&__ip_vs_mutex);
 
        if (cmd == IPVS_CMD_FLUSH) {
-               ret = ip_vs_flush();
+               ret = ip_vs_flush(net);
                goto out;
        } else if (cmd == IPVS_CMD_SET_CONFIG) {
-               ret = ip_vs_genl_set_config(info->attrs);
+               ret = ip_vs_genl_set_config(net, info->attrs);
                goto out;
        } else if (cmd == IPVS_CMD_NEW_DAEMON ||
                   cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3068,13 +3310,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
                }
 
                if (cmd == IPVS_CMD_NEW_DAEMON)
-                       ret = ip_vs_genl_new_daemon(daemon_attrs);
+                       ret = ip_vs_genl_new_daemon(net, daemon_attrs);
                else
-                       ret = ip_vs_genl_del_daemon(daemon_attrs);
+                       ret = ip_vs_genl_del_daemon(net, daemon_attrs);
                goto out;
        } else if (cmd == IPVS_CMD_ZERO &&
                   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
-               ret = ip_vs_zero_all();
+               ret = ip_vs_zero_all(net);
                goto out;
        }
 
@@ -3084,19 +3326,12 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
        if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
                need_full_svc = 1;
 
-       ret = ip_vs_genl_parse_service(&usvc,
+       ret = ip_vs_genl_parse_service(net, &usvc,
                                       info->attrs[IPVS_CMD_ATTR_SERVICE],
-                                      need_full_svc);
+                                      need_full_svc, &svc);
        if (ret)
                goto out;
 
-       /* Lookup the exact service by <protocol, addr, port> or fwmark */
-       if (usvc.fwmark == 0)
-               svc = __ip_vs_service_get(usvc.af, usvc.protocol,
-                                         &usvc.addr, usvc.port);
-       else
-               svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
-
        /* Unless we're adding a new service, the service must already exist */
        if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
                ret = -ESRCH;
@@ -3121,7 +3356,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
        switch (cmd) {
        case IPVS_CMD_NEW_SERVICE:
                if (svc == NULL)
-                       ret = ip_vs_add_service(&usvc, &svc);
+                       ret = ip_vs_add_service(net, &usvc, &svc);
                else
                        ret = -EEXIST;
                break;
@@ -3130,6 +3365,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
                break;
        case IPVS_CMD_DEL_SERVICE:
                ret = ip_vs_del_service(svc);
+               /* do not use svc, it can be freed */
                break;
        case IPVS_CMD_NEW_DEST:
                ret = ip_vs_add_dest(svc, &udest);
@@ -3148,8 +3384,6 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
        }
 
 out:
-       if (svc)
-               ip_vs_service_put(svc);
        mutex_unlock(&__ip_vs_mutex);
 
        return ret;
@@ -3160,7 +3394,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        struct sk_buff *msg;
        void *reply;
        int ret, cmd, reply_cmd;
+       struct net *net;
+       struct netns_ipvs *ipvs;
 
+       net = skb_sknet(skb);
+       ipvs = net_ipvs(net);
        cmd = info->genlhdr->cmd;
 
        if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3170,7 +3408,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        else if (cmd == IPVS_CMD_GET_CONFIG)
                reply_cmd = IPVS_CMD_SET_CONFIG;
        else {
-               IP_VS_ERR("unknown Generic Netlink command\n");
+               pr_err("unknown Generic Netlink command\n");
                return -EINVAL;
        }
 
@@ -3189,13 +3427,13 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        {
                struct ip_vs_service *svc;
 
-               svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+               svc = ip_vs_genl_find_service(net,
+                                             info->attrs[IPVS_CMD_ATTR_SERVICE]);
                if (IS_ERR(svc)) {
                        ret = PTR_ERR(svc);
                        goto out_err;
                } else if (svc) {
                        ret = ip_vs_genl_fill_service(msg, svc);
-                       ip_vs_service_put(svc);
                        if (ret)
                                goto nla_put_failure;
                } else {
@@ -3210,7 +3448,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        {
                struct ip_vs_timeout_user t;
 
-               __ip_vs_get_timeouts(&t);
+               __ip_vs_get_timeouts(net, &t);
 #ifdef CONFIG_IP_VS_PROTO_TCP
                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3226,16 +3464,16 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        case IPVS_CMD_GET_INFO:
                NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
                NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
-                           IP_VS_CONN_TAB_SIZE);
+                           ip_vs_conn_tab_size);
                break;
        }
 
        genlmsg_end(msg, reply);
-       ret = genlmsg_unicast(msg, info->snd_pid);
+       ret = genlmsg_reply(msg, info);
        goto out;
 
 nla_put_failure:
-       IP_VS_ERR("not enough space in Netlink message\n");
+       pr_err("not enough space in Netlink message\n");
        ret = -EMSGSIZE;
 
 out_err:
@@ -3345,86 +3583,197 @@ static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
 
 static int __init ip_vs_genl_register(void)
 {
-       int ret, i;
+       return genl_register_family_with_ops(&ip_vs_genl_family,
+               ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
+}
 
-       ret = genl_register_family(&ip_vs_genl_family);
-       if (ret)
-               return ret;
+static void ip_vs_genl_unregister(void)
+{
+       genl_unregister_family(&ip_vs_genl_family);
+}
 
-       for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
-               ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
-               if (ret)
-                       goto err_out;
+/* End of Generic Netlink interface definitions */
+
+/*
+ * per netns intit/exit func.
+ */
+#ifdef CONFIG_SYSCTL
+int __net_init __ip_vs_control_init_sysctl(struct net *net)
+{
+       int idx;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ctl_table *tbl;
+
+       atomic_set(&ipvs->dropentry, 0);
+       spin_lock_init(&ipvs->dropentry_lock);
+       spin_lock_init(&ipvs->droppacket_lock);
+       spin_lock_init(&ipvs->securetcp_lock);
+
+       if (!net_eq(net, &init_net)) {
+               tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+               if (tbl == NULL)
+                       return -ENOMEM;
+       } else
+               tbl = vs_vars;
+       /* Initialize sysctl defaults */
+       idx = 0;
+       ipvs->sysctl_amemthresh = 1024;
+       tbl[idx++].data = &ipvs->sysctl_amemthresh;
+       ipvs->sysctl_am_droprate = 10;
+       tbl[idx++].data = &ipvs->sysctl_am_droprate;
+       tbl[idx++].data = &ipvs->sysctl_drop_entry;
+       tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+       tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+       tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+       ipvs->sysctl_snat_reroute = 1;
+       tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+       ipvs->sysctl_sync_ver = 1;
+       tbl[idx++].data = &ipvs->sysctl_sync_ver;
+       tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+       tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+       tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+       ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
+       ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
+       tbl[idx].data = &ipvs->sysctl_sync_threshold;
+       tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+       tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+
+       ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
+                                                    tbl);
+       if (ipvs->sysctl_hdr == NULL) {
+               if (!net_eq(net, &init_net))
+                       kfree(tbl);
+               return -ENOMEM;
        }
-       return 0;
+       ip_vs_start_estimator(net, &ipvs->tot_stats);
+       ipvs->sysctl_tbl = tbl;
+       /* Schedule defense work */
+       INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+       schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 
-err_out:
-       genl_unregister_family(&ip_vs_genl_family);
-       return ret;
+       return 0;
 }
 
-static void ip_vs_genl_unregister(void)
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
 {
-       genl_unregister_family(&ip_vs_genl_family);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       cancel_delayed_work_sync(&ipvs->defense_work);
+       cancel_work_sync(&ipvs->defense_work.work);
+       unregister_net_sysctl_table(ipvs->sysctl_hdr);
 }
 
-/* End of Generic Netlink interface definitions */
+#else
 
+int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
 
-int __init ip_vs_control_init(void)
+#endif
+
+static struct notifier_block ip_vs_dst_notifier = {
+       .notifier_call = ip_vs_dst_event,
+};
+
+int __net_init __ip_vs_control_init(struct net *net)
 {
-       int ret;
        int idx;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-       EnterFunction(2);
+       ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
 
-       ret = nf_register_sockopt(&ip_vs_sockopts);
-       if (ret) {
-               IP_VS_ERR("cannot register sockopt.\n");
-               return ret;
-       }
+       /* Initialize rs_table */
+       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
 
-       ret = ip_vs_genl_register();
-       if (ret) {
-               IP_VS_ERR("cannot register Generic Netlink interface.\n");
-               nf_unregister_sockopt(&ip_vs_sockopts);
-               return ret;
+       INIT_LIST_HEAD(&ipvs->dest_trash);
+       atomic_set(&ipvs->ftpsvc_counter, 0);
+       atomic_set(&ipvs->nullsvc_counter, 0);
+
+       /* procfs stats */
+       ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!ipvs->tot_stats.cpustats) {
+               pr_err("%s(): alloc_percpu.\n", __func__);
+               return -ENOMEM;
        }
+       spin_lock_init(&ipvs->tot_stats.lock);
+
+       proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+       proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+       proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+                            &ip_vs_stats_percpu_fops);
+
+       if (__ip_vs_control_init_sysctl(net))
+               goto err;
+
+       return 0;
+
+err:
+       free_percpu(ipvs->tot_stats.cpustats);
+       return -ENOMEM;
+}
 
-       proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-       proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
+void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_trash_cleanup(net);
+       ip_vs_stop_estimator(net, &ipvs->tot_stats);
+       __ip_vs_control_cleanup_sysctl(net);
+       proc_net_remove(net, "ip_vs_stats_percpu");
+       proc_net_remove(net, "ip_vs_stats");
+       proc_net_remove(net, "ip_vs");
+       free_percpu(ipvs->tot_stats.cpustats);
+}
 
-       sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
+int __init ip_vs_control_init(void)
+{
+       int idx;
+       int ret;
 
-       /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+       EnterFunction(2);
+
+       /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
        }
-       for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
-               INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+
+       smp_wmb();      /* Do we really need it now ? */
+
+       ret = nf_register_sockopt(&ip_vs_sockopts);
+       if (ret) {
+               pr_err("cannot register sockopt.\n");
+               goto err_sock;
        }
 
-       ip_vs_new_estimator(&ip_vs_stats);
+       ret = ip_vs_genl_register();
+       if (ret) {
+               pr_err("cannot register Generic Netlink interface.\n");
+               goto err_genl;
+       }
 
-       /* Hook the defense timer */
-       schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+       ret = register_netdevice_notifier(&ip_vs_dst_notifier);
+       if (ret < 0)
+               goto err_notf;
 
        LeaveFunction(2);
        return 0;
+
+err_notf:
+       ip_vs_genl_unregister();
+err_genl:
+       nf_unregister_sockopt(&ip_vs_sockopts);
+err_sock:
+       return ret;
 }
 
 
 void ip_vs_control_cleanup(void)
 {
        EnterFunction(2);
-       ip_vs_trash_cleanup();
-       cancel_rearming_delayed_work(&defense_work);
-       cancel_work_sync(&defense_work.work);
-       ip_vs_kill_estimator(&ip_vs_stats);
-       unregister_sysctl_table(sysctl_header);
-       proc_net_remove(&init_net, "ip_vs_stats");
-       proc_net_remove(&init_net, "ip_vs");
        ip_vs_genl_unregister();
        nf_unregister_sockopt(&ip_vs_sockopts);
        LeaveFunction(2);