Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
index 68b8033..37890f2 100644 (file)
@@ -69,25 +69,30 @@ int ip_vs_get_debug_level(void)
 }
 #endif
 
+
+/*  Protos */
+static void __ip_vs_del_service(struct ip_vs_service *svc);
+
+
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
 static int __ip_vs_addr_is_local_v6(struct net *net,
                                    const struct in6_addr *addr)
 {
        struct rt6_info *rt;
-       struct flowi fl = {
-               .oif = 0,
-               .fl6_dst = *addr,
-               .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
+       struct flowi6 fl6 = {
+               .daddr = *addr,
        };
 
-       rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
+       rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
        if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
-                       return 1;
+               return 1;
 
        return 0;
 }
 #endif
+
+#ifdef CONFIG_SYSCTL
 /*
  *     update_defense_level is called from keventd and from sysctl,
  *     so it needs to protect itself from softirqs
@@ -229,6 +234,7 @@ static void defense_work_handler(struct work_struct *work)
                ip_vs_random_dropentry(ipvs->net);
        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
+#endif
 
 int
 ip_vs_use_count_inc(void)
@@ -411,9 +417,11 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
        /*
         *      Check the table hashed by fwmark first
         */
-       svc = __ip_vs_svc_fwm_find(net, af, fwmark);
-       if (fwmark && svc)
-               goto out;
+       if (fwmark) {
+               svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+               if (svc)
+                       goto out;
+       }
 
        /*
         *      Check the table hashed by <protocol,addr,port>
@@ -709,13 +717,39 @@ static void ip_vs_trash_cleanup(struct net *net)
        }
 }
 
+static void
+ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+
+       spin_lock_bh(&src->lock);
+
+       IP_VS_SHOW_STATS_COUNTER(conns);
+       IP_VS_SHOW_STATS_COUNTER(inpkts);
+       IP_VS_SHOW_STATS_COUNTER(outpkts);
+       IP_VS_SHOW_STATS_COUNTER(inbytes);
+       IP_VS_SHOW_STATS_COUNTER(outbytes);
+
+       ip_vs_read_estimator(dst, src);
+
+       spin_unlock_bh(&src->lock);
+}
 
 static void
 ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
        spin_lock_bh(&stats->lock);
 
-       memset(&stats->ustats, 0, sizeof(stats->ustats));
+       /* get current counters as zero point, rates are zeroed */
+
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+
+       IP_VS_ZERO_STATS_COUNTER(conns);
+       IP_VS_ZERO_STATS_COUNTER(inpkts);
+       IP_VS_ZERO_STATS_COUNTER(outpkts);
+       IP_VS_ZERO_STATS_COUNTER(inbytes);
+       IP_VS_ZERO_STATS_COUNTER(outbytes);
+
        ip_vs_zero_estimator(stats);
 
        spin_unlock_bh(&stats->lock);
@@ -769,12 +803,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
        dest->u_threshold = udest->u_threshold;
        dest->l_threshold = udest->l_threshold;
 
-       spin_lock(&dest->dst_lock);
+       spin_lock_bh(&dest->dst_lock);
        ip_vs_dst_reset(dest);
-       spin_unlock(&dest->dst_lock);
+       spin_unlock_bh(&dest->dst_lock);
 
        if (add)
-               ip_vs_new_estimator(svc->net, &dest->stats);
+               ip_vs_start_estimator(svc->net, &dest->stats);
 
        write_lock_bh(&__ip_vs_svc_lock);
 
@@ -980,7 +1014,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
        struct netns_ipvs *ipvs = net_ipvs(net);
 
-       ip_vs_kill_estimator(net, &dest->stats);
+       ip_vs_stop_estimator(net, &dest->stats);
 
        /*
         *  Remove it from the d-linked list with the real services.
@@ -1173,7 +1207,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
        else if (svc->port == 0)
                atomic_inc(&ipvs->nullsvc_counter);
 
-       ip_vs_new_estimator(net, &svc->stats);
+       ip_vs_start_estimator(net, &svc->stats);
 
        /* Count only IPv4 services for old get/setsockopt interface */
        if (svc->af == AF_INET)
@@ -1185,6 +1219,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
        write_unlock_bh(&__ip_vs_svc_lock);
 
        *svc_p = svc;
+       /* Now there is a service - full throttle */
+       ipvs->enable = 1;
        return 0;
 
 
@@ -1325,7 +1361,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
        if (svc->af == AF_INET)
                ipvs->num_services--;
 
-       ip_vs_kill_estimator(svc->net, &svc->stats);
+       ip_vs_stop_estimator(svc->net, &svc->stats);
 
        /* Unbind scheduler */
        old_sched = svc->scheduler;
@@ -1443,6 +1479,84 @@ static int ip_vs_flush(struct net *net)
        return 0;
 }
 
+/*
+ *     Delete service by {netns} in the service table.
+ *     Called by __ip_vs_cleanup()
+ */
+void __ip_vs_service_cleanup(struct net *net)
+{
+       EnterFunction(2);
+       /* Check for "full" addressed entries */
+       mutex_lock(&__ip_vs_mutex);
+       ip_vs_flush(net);
+       mutex_unlock(&__ip_vs_mutex);
+       LeaveFunction(2);
+}
+/*
+ * Release dst hold by dst_cache
+ */
+static inline void
+__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
+{
+       spin_lock_bh(&dest->dst_lock);
+       if (dest->dst_cache && dest->dst_cache->dev == dev) {
+               IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
+                             dev->name,
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
+                             ntohs(dest->port),
+                             atomic_read(&dest->refcnt));
+               ip_vs_dst_reset(dest);
+       }
+       spin_unlock_bh(&dest->dst_lock);
+
+}
+/*
+ * Netdev event receiver
+ * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
+ * a device that is "unregister" it must be released.
+ */
+static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
+                           void *ptr)
+{
+       struct net_device *dev = ptr;
+       struct net *net = dev_net(dev);
+       struct ip_vs_service *svc;
+       struct ip_vs_dest *dest;
+       unsigned int idx;
+
+       if (event != NETDEV_UNREGISTER)
+               return NOTIFY_DONE;
+       IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
+       EnterFunction(2);
+       mutex_lock(&__ip_vs_mutex);
+       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+               list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+                       if (net_eq(svc->net, net)) {
+                               list_for_each_entry(dest, &svc->destinations,
+                                                   n_list) {
+                                       __ip_vs_dev_reset(dest, dev);
+                               }
+                       }
+               }
+
+               list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+                       if (net_eq(svc->net, net)) {
+                               list_for_each_entry(dest, &svc->destinations,
+                                                   n_list) {
+                                       __ip_vs_dev_reset(dest, dev);
+                               }
+                       }
+
+               }
+       }
+
+       list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
+               __ip_vs_dev_reset(dest, dev);
+       }
+       mutex_unlock(&__ip_vs_mutex);
+       LeaveFunction(2);
+       return NOTIFY_DONE;
+}
 
 /*
  *     Zero counters in a service or all services
@@ -1479,11 +1593,11 @@ static int ip_vs_zero_all(struct net *net)
                }
        }
 
-       ip_vs_zero_stats(net_ipvs(net)->tot_stats);
+       ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
        return 0;
 }
 
-
+#ifdef CONFIG_SYSCTL
 static int
 proc_do_defense_mode(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1505,7 +1619,6 @@ proc_do_defense_mode(ctl_table *table, int write,
        return rc;
 }
 
-
 static int
 proc_do_sync_threshold(ctl_table *table, int write,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1739,6 +1852,7 @@ const struct ctl_path net_vs_ctl_path[] = {
        { }
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
+#endif
 
 #ifdef CONFIG_PROC_FS
 
@@ -1952,7 +2066,7 @@ static const struct file_operations ip_vs_info_fops = {
        .open    = ip_vs_info_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
-       .release = seq_release_private,
+       .release = seq_release_net,
 };
 
 #endif
@@ -1961,7 +2075,7 @@ static const struct file_operations ip_vs_info_fops = {
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
        struct net *net = seq_file_single_net(seq);
-       struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+       struct ip_vs_stats_user show;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
@@ -1969,22 +2083,18 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
        seq_printf(seq,
                   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
-       spin_lock_bh(&tot_stats->lock);
-       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
-                  tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
-                  (unsigned long long) tot_stats->ustats.inbytes,
-                  (unsigned long long) tot_stats->ustats.outbytes);
+       ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
+       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
+                  show.inpkts, show.outpkts,
+                  (unsigned long long) show.inbytes,
+                  (unsigned long long) show.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-       seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-                       tot_stats->ustats.cps,
-                       tot_stats->ustats.inpps,
-                       tot_stats->ustats.outpps,
-                       tot_stats->ustats.inbps,
-                       tot_stats->ustats.outbps);
-       spin_unlock_bh(&tot_stats->lock);
+       seq_printf(seq, "%8X %8X %8X %16X %16X\n",
+                       show.cps, show.inpps, show.outpps,
+                       show.inbps, show.outbps);
 
        return 0;
 }
@@ -1999,13 +2109,15 @@ static const struct file_operations ip_vs_stats_fops = {
        .open = ip_vs_stats_seq_open,
        .read = seq_read,
        .llseek = seq_lseek,
-       .release = single_release,
+       .release = single_release_net,
 };
 
 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
 {
        struct net *net = seq_file_single_net(seq);
-       struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+       struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
+       struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
+       struct ip_vs_stats_user rates;
        int i;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2015,30 +2127,43 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
                   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
 
        for_each_possible_cpu(i) {
-               struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+               struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
+               unsigned int start;
+               __u64 inbytes, outbytes;
+
+               do {
+                       start = u64_stats_fetch_begin_bh(&u->syncp);
+                       inbytes = u->ustats.inbytes;
+                       outbytes = u->ustats.outbytes;
+               } while (u64_stats_fetch_retry_bh(&u->syncp, start));
+
                seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
-                           i, u->ustats.conns, u->ustats.inpkts,
-                           u->ustats.outpkts, (__u64)u->ustats.inbytes,
-                           (__u64)u->ustats.outbytes);
+                          i, u->ustats.conns, u->ustats.inpkts,
+                          u->ustats.outpkts, (__u64)inbytes,
+                          (__u64)outbytes);
        }
 
        spin_lock_bh(&tot_stats->lock);
+
        seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
                   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
                   tot_stats->ustats.outpkts,
                   (unsigned long long) tot_stats->ustats.inbytes,
                   (unsigned long long) tot_stats->ustats.outbytes);
 
+       ip_vs_read_estimator(&rates, tot_stats);
+
+       spin_unlock_bh(&tot_stats->lock);
+
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
                   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
        seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
-                       tot_stats->ustats.cps,
-                       tot_stats->ustats.inpps,
-                       tot_stats->ustats.outpps,
-                       tot_stats->ustats.inbps,
-                       tot_stats->ustats.outbps);
-       spin_unlock_bh(&tot_stats->lock);
+                       rates.cps,
+                       rates.inpps,
+                       rates.outpps,
+                       rates.inbps,
+                       rates.outbps);
 
        return 0;
 }
@@ -2053,7 +2178,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = {
        .open = ip_vs_stats_percpu_seq_open,
        .read = seq_read,
        .llseek = seq_lseek,
-       .release = single_release,
+       .release = single_release_net,
 };
 #endif
 
@@ -2286,14 +2411,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 
 static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
-{
-       spin_lock_bh(&src->lock);
-       memcpy(dst, &src->ustats, sizeof(*dst));
-       spin_unlock_bh(&src->lock);
-}
-
-static void
 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 {
        dst->protocol = src->protocol;
@@ -2679,31 +2796,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
                                 struct ip_vs_stats *stats)
 {
+       struct ip_vs_stats_user ustats;
        struct nlattr *nl_stats = nla_nest_start(skb, container_type);
        if (!nl_stats)
                return -EMSGSIZE;
 
-       spin_lock_bh(&stats->lock);
+       ip_vs_copy_stats(&ustats, stats);
 
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
-       NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
-       NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
-       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
-
-       spin_unlock_bh(&stats->lock);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
+       NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
+       NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
+       NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
 
        nla_nest_end(skb, nl_stats);
 
        return 0;
 
 nla_put_failure:
-       spin_unlock_bh(&stats->lock);
        nla_nest_cancel(skb, nl_stats);
        return -EMSGSIZE;
 }
@@ -3090,7 +3205,7 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
                                   struct netlink_callback *cb)
 {
-       struct net *net = skb_net(skb);
+       struct net *net = skb_sknet(skb);
        struct netns_ipvs *ipvs = net_ipvs(net);
 
        mutex_lock(&__ip_vs_mutex);
@@ -3482,7 +3597,8 @@ static void ip_vs_genl_unregister(void)
 /*
  * per netns intit/exit func.
  */
-int __net_init __ip_vs_control_init(struct net *net)
+#ifdef CONFIG_SYSCTL
+int __net_init __ip_vs_control_init_sysctl(struct net *net)
 {
        int idx;
        struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3492,41 +3608,11 @@ int __net_init __ip_vs_control_init(struct net *net)
        spin_lock_init(&ipvs->dropentry_lock);
        spin_lock_init(&ipvs->droppacket_lock);
        spin_lock_init(&ipvs->securetcp_lock);
-       ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
-
-       /* Initialize rs_table */
-       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
-               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
-
-       INIT_LIST_HEAD(&ipvs->dest_trash);
-       atomic_set(&ipvs->ftpsvc_counter, 0);
-       atomic_set(&ipvs->nullsvc_counter, 0);
-
-       /* procfs stats */
-       ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
-       if (ipvs->tot_stats == NULL) {
-               pr_err("%s(): no memory.\n", __func__);
-               return -ENOMEM;
-       }
-       ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-       if (!ipvs->cpustats) {
-               pr_err("%s() alloc_percpu failed\n", __func__);
-               goto err_alloc;
-       }
-       spin_lock_init(&ipvs->tot_stats->lock);
-
-       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
-               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
-
-       proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
-       proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
-       proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
-                            &ip_vs_stats_percpu_fops);
 
        if (!net_eq(net, &init_net)) {
                tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
                if (tbl == NULL)
-                       goto err_dup;
+                       return -ENOMEM;
        } else
                tbl = vs_vars;
        /* Initialize sysctl defaults */
@@ -3548,55 +3634,100 @@ int __net_init __ip_vs_control_init(struct net *net)
        tbl[idx++].data = &ipvs->sysctl_cache_bypass;
        tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
        tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
-       ipvs->sysctl_sync_threshold[0] = 3;
-       ipvs->sysctl_sync_threshold[1] = 50;
+       ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
+       ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
        tbl[idx].data = &ipvs->sysctl_sync_threshold;
        tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
        tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
 
 
        ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
-                                                 vs_vars);
-       if (ipvs->sysctl_hdr == NULL)
-               goto err_reg;
-       ip_vs_new_estimator(net, ipvs->tot_stats);
+                                                    tbl);
+       if (ipvs->sysctl_hdr == NULL) {
+               if (!net_eq(net, &init_net))
+                       kfree(tbl);
+               return -ENOMEM;
+       }
+       ip_vs_start_estimator(net, &ipvs->tot_stats);
        ipvs->sysctl_tbl = tbl;
        /* Schedule defense work */
        INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
-       return 0;
 
-err_reg:
-       if (!net_eq(net, &init_net))
-               kfree(tbl);
-err_dup:
-       free_percpu(ipvs->cpustats);
-err_alloc:
-       kfree(ipvs->tot_stats);
-       return -ENOMEM;
+       return 0;
 }
 
-static void __net_exit __ip_vs_control_cleanup(struct net *net)
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
 {
        struct netns_ipvs *ipvs = net_ipvs(net);
 
-       ip_vs_trash_cleanup(net);
-       ip_vs_kill_estimator(net, ipvs->tot_stats);
        cancel_delayed_work_sync(&ipvs->defense_work);
        cancel_work_sync(&ipvs->defense_work.work);
        unregister_net_sysctl_table(ipvs->sysctl_hdr);
+}
+
+#else
+
+int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
+
+#endif
+
+static struct notifier_block ip_vs_dst_notifier = {
+       .notifier_call = ip_vs_dst_event,
+};
+
+int __net_init __ip_vs_control_init(struct net *net)
+{
+       int idx;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+       /* Initialize rs_table */
+       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+       INIT_LIST_HEAD(&ipvs->dest_trash);
+       atomic_set(&ipvs->ftpsvc_counter, 0);
+       atomic_set(&ipvs->nullsvc_counter, 0);
+
+       /* procfs stats */
+       ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!ipvs->tot_stats.cpustats) {
+               pr_err("%s(): alloc_percpu.\n", __func__);
+               return -ENOMEM;
+       }
+       spin_lock_init(&ipvs->tot_stats.lock);
+
+       proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+       proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+       proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+                            &ip_vs_stats_percpu_fops);
+
+       if (__ip_vs_control_init_sysctl(net))
+               goto err;
+
+       return 0;
+
+err:
+       free_percpu(ipvs->tot_stats.cpustats);
+       return -ENOMEM;
+}
+
+void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_trash_cleanup(net);
+       ip_vs_stop_estimator(net, &ipvs->tot_stats);
+       __ip_vs_control_cleanup_sysctl(net);
        proc_net_remove(net, "ip_vs_stats_percpu");
        proc_net_remove(net, "ip_vs_stats");
        proc_net_remove(net, "ip_vs");
-       free_percpu(ipvs->cpustats);
-       kfree(ipvs->tot_stats);
+       free_percpu(ipvs->tot_stats.cpustats);
 }
 
-static struct pernet_operations ipvs_control_ops = {
-       .init = __ip_vs_control_init,
-       .exit = __ip_vs_control_cleanup,
-};
-
 int __init ip_vs_control_init(void)
 {
        int idx;
@@ -3610,33 +3741,32 @@ int __init ip_vs_control_init(void)
                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
        }
 
-       ret = register_pernet_subsys(&ipvs_control_ops);
-       if (ret) {
-               pr_err("cannot register namespace.\n");
-               goto err;
-       }
-
        smp_wmb();      /* Do we really need it now ? */
 
        ret = nf_register_sockopt(&ip_vs_sockopts);
        if (ret) {
                pr_err("cannot register sockopt.\n");
-               goto err_net;
+               goto err_sock;
        }
 
        ret = ip_vs_genl_register();
        if (ret) {
                pr_err("cannot register Generic Netlink interface.\n");
-               nf_unregister_sockopt(&ip_vs_sockopts);
-               goto err_net;
+               goto err_genl;
        }
 
+       ret = register_netdevice_notifier(&ip_vs_dst_notifier);
+       if (ret < 0)
+               goto err_notf;
+
        LeaveFunction(2);
        return 0;
 
-err_net:
-       unregister_pernet_subsys(&ipvs_control_ops);
-err:
+err_notf:
+       ip_vs_genl_unregister();
+err_genl:
+       nf_unregister_sockopt(&ip_vs_sockopts);
+err_sock:
        return ret;
 }
 
@@ -3644,7 +3774,6 @@ err:
 void ip_vs_control_cleanup(void)
 {
        EnterFunction(2);
-       unregister_pernet_subsys(&ipvs_control_ops);
        ip_vs_genl_unregister();
        nf_unregister_sockopt(&ip_vs_sockopts);
        LeaveFunction(2);