Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
index c93d806..37890f2 100644 (file)
@@ -69,25 +69,30 @@ int ip_vs_get_debug_level(void)
 }
 #endif
 
+
+/*  Protos */
+static void __ip_vs_del_service(struct ip_vs_service *svc);
+
+
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
 static int __ip_vs_addr_is_local_v6(struct net *net,
                                    const struct in6_addr *addr)
 {
        struct rt6_info *rt;
-       struct flowi fl = {
-               .oif = 0,
-               .fl6_dst = *addr,
-               .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
+       struct flowi6 fl6 = {
+               .daddr = *addr,
        };
 
-       rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
+       rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
        if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
-                       return 1;
+               return 1;
 
        return 0;
 }
 #endif
+
+#ifdef CONFIG_SYSCTL
 /*
  *     update_defense_level is called from keventd and from sysctl,
  *     so it needs to protect itself from softirqs
@@ -229,6 +234,7 @@ static void defense_work_handler(struct work_struct *work)
                ip_vs_random_dropentry(ipvs->net);
        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
+#endif
 
 int
 ip_vs_use_count_inc(void)
@@ -797,12 +803,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
        dest->u_threshold = udest->u_threshold;
        dest->l_threshold = udest->l_threshold;
 
-       spin_lock(&dest->dst_lock);
+       spin_lock_bh(&dest->dst_lock);
        ip_vs_dst_reset(dest);
-       spin_unlock(&dest->dst_lock);
+       spin_unlock_bh(&dest->dst_lock);
 
        if (add)
-               ip_vs_new_estimator(svc->net, &dest->stats);
+               ip_vs_start_estimator(svc->net, &dest->stats);
 
        write_lock_bh(&__ip_vs_svc_lock);
 
@@ -1008,7 +1014,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
        struct netns_ipvs *ipvs = net_ipvs(net);
 
-       ip_vs_kill_estimator(net, &dest->stats);
+       ip_vs_stop_estimator(net, &dest->stats);
 
        /*
         *  Remove it from the d-linked list with the real services.
@@ -1201,7 +1207,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
        else if (svc->port == 0)
                atomic_inc(&ipvs->nullsvc_counter);
 
-       ip_vs_new_estimator(net, &svc->stats);
+       ip_vs_start_estimator(net, &svc->stats);
 
        /* Count only IPv4 services for old get/setsockopt interface */
        if (svc->af == AF_INET)
@@ -1213,6 +1219,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
        write_unlock_bh(&__ip_vs_svc_lock);
 
        *svc_p = svc;
+       /* Now there is a service - full throttle */
+       ipvs->enable = 1;
        return 0;
 
 
@@ -1353,7 +1361,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
        if (svc->af == AF_INET)
                ipvs->num_services--;
 
-       ip_vs_kill_estimator(svc->net, &svc->stats);
+       ip_vs_stop_estimator(svc->net, &svc->stats);
 
        /* Unbind scheduler */
        old_sched = svc->scheduler;
@@ -1471,6 +1479,84 @@ static int ip_vs_flush(struct net *net)
        return 0;
 }
 
+/*
+ *     Delete service by {netns} in the service table.
+ *     Called by __ip_vs_cleanup()
+ */
+void __ip_vs_service_cleanup(struct net *net)
+{
+       EnterFunction(2);
+       /* Check for "full" addressed entries */
+       mutex_lock(&__ip_vs_mutex);
+       ip_vs_flush(net);
+       mutex_unlock(&__ip_vs_mutex);
+       LeaveFunction(2);
+}
+/*
+ * Release dst hold by dst_cache
+ */
+static inline void
+__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
+{
+       spin_lock_bh(&dest->dst_lock);
+       if (dest->dst_cache && dest->dst_cache->dev == dev) {
+               IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
+                             dev->name,
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
+                             ntohs(dest->port),
+                             atomic_read(&dest->refcnt));
+               ip_vs_dst_reset(dest);
+       }
+       spin_unlock_bh(&dest->dst_lock);
+
+}
+/*
+ * Netdev event receiver
+ * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
+ * a device that is "unregister" it must be released.
+ */
+static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
+                           void *ptr)
+{
+       struct net_device *dev = ptr;
+       struct net *net = dev_net(dev);
+       struct ip_vs_service *svc;
+       struct ip_vs_dest *dest;
+       unsigned int idx;
+
+       if (event != NETDEV_UNREGISTER)
+               return NOTIFY_DONE;
+       IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
+       EnterFunction(2);
+       mutex_lock(&__ip_vs_mutex);
+       for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+               list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+                       if (net_eq(svc->net, net)) {
+                               list_for_each_entry(dest, &svc->destinations,
+                                                   n_list) {
+                                       __ip_vs_dev_reset(dest, dev);
+                               }
+                       }
+               }
+
+               list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+                       if (net_eq(svc->net, net)) {
+                               list_for_each_entry(dest, &svc->destinations,
+                                                   n_list) {
+                                       __ip_vs_dev_reset(dest, dev);
+                               }
+                       }
+
+               }
+       }
+
+       list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
+               __ip_vs_dev_reset(dest, dev);
+       }
+       mutex_unlock(&__ip_vs_mutex);
+       LeaveFunction(2);
+       return NOTIFY_DONE;
+}
 
 /*
  *     Zero counters in a service or all services
@@ -1511,7 +1597,7 @@ static int ip_vs_zero_all(struct net *net)
        return 0;
 }
 
-
+#ifdef CONFIG_SYSCTL
 static int
 proc_do_defense_mode(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1533,7 +1619,6 @@ proc_do_defense_mode(ctl_table *table, int write,
        return rc;
 }
 
-
 static int
 proc_do_sync_threshold(ctl_table *table, int write,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1767,6 +1852,7 @@ const struct ctl_path net_vs_ctl_path[] = {
        { }
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
+#endif
 
 #ifdef CONFIG_PROC_FS
 
@@ -1980,7 +2066,7 @@ static const struct file_operations ip_vs_info_fops = {
        .open    = ip_vs_info_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
-       .release = seq_release_private,
+       .release = seq_release_net,
 };
 
 #endif
@@ -2023,7 +2109,7 @@ static const struct file_operations ip_vs_stats_fops = {
        .open = ip_vs_stats_seq_open,
        .read = seq_read,
        .llseek = seq_lseek,
-       .release = single_release,
+       .release = single_release_net,
 };
 
 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
@@ -2092,7 +2178,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = {
        .open = ip_vs_stats_percpu_seq_open,
        .read = seq_read,
        .llseek = seq_lseek,
-       .release = single_release,
+       .release = single_release_net,
 };
 #endif
 
@@ -3119,7 +3205,7 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
                                   struct netlink_callback *cb)
 {
-       struct net *net = skb_net(skb);
+       struct net *net = skb_sknet(skb);
        struct netns_ipvs *ipvs = net_ipvs(net);
 
        mutex_lock(&__ip_vs_mutex);
@@ -3511,7 +3597,8 @@ static void ip_vs_genl_unregister(void)
 /*
  * per netns intit/exit func.
  */
-int __net_init __ip_vs_control_init(struct net *net)
+#ifdef CONFIG_SYSCTL
+int __net_init __ip_vs_control_init_sysctl(struct net *net)
 {
        int idx;
        struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3521,33 +3608,11 @@ int __net_init __ip_vs_control_init(struct net *net)
        spin_lock_init(&ipvs->dropentry_lock);
        spin_lock_init(&ipvs->droppacket_lock);
        spin_lock_init(&ipvs->securetcp_lock);
-       ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
-
-       /* Initialize rs_table */
-       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
-               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
-
-       INIT_LIST_HEAD(&ipvs->dest_trash);
-       atomic_set(&ipvs->ftpsvc_counter, 0);
-       atomic_set(&ipvs->nullsvc_counter, 0);
-
-       /* procfs stats */
-       ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-       if (!ipvs->tot_stats.cpustats) {
-               pr_err("%s() alloc_percpu failed\n", __func__);
-               goto err_alloc;
-       }
-       spin_lock_init(&ipvs->tot_stats.lock);
-
-       proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
-       proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
-       proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
-                            &ip_vs_stats_percpu_fops);
 
        if (!net_eq(net, &init_net)) {
                tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
                if (tbl == NULL)
-                       goto err_dup;
+                       return -ENOMEM;
        } else
                tbl = vs_vars;
        /* Initialize sysctl defaults */
@@ -3569,57 +3634,100 @@ int __net_init __ip_vs_control_init(struct net *net)
        tbl[idx++].data = &ipvs->sysctl_cache_bypass;
        tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
        tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
-       ipvs->sysctl_sync_threshold[0] = 3;
-       ipvs->sysctl_sync_threshold[1] = 50;
+       ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
+       ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
        tbl[idx].data = &ipvs->sysctl_sync_threshold;
        tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
        tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
 
 
-#ifdef CONFIG_SYSCTL
        ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
                                                     tbl);
        if (ipvs->sysctl_hdr == NULL) {
                if (!net_eq(net, &init_net))
                        kfree(tbl);
-               goto err_dup;
+               return -ENOMEM;
        }
-#endif
-       ip_vs_new_estimator(net, &ipvs->tot_stats);
+       ip_vs_start_estimator(net, &ipvs->tot_stats);
        ipvs->sysctl_tbl = tbl;
        /* Schedule defense work */
        INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
        schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
-       return 0;
 
-err_dup:
-       free_percpu(ipvs->tot_stats.cpustats);
-err_alloc:
-       return -ENOMEM;
+       return 0;
 }
 
-static void __net_exit __ip_vs_control_cleanup(struct net *net)
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
 {
        struct netns_ipvs *ipvs = net_ipvs(net);
 
-       ip_vs_trash_cleanup(net);
-       ip_vs_kill_estimator(net, &ipvs->tot_stats);
        cancel_delayed_work_sync(&ipvs->defense_work);
        cancel_work_sync(&ipvs->defense_work.work);
-#ifdef CONFIG_SYSCTL
        unregister_net_sysctl_table(ipvs->sysctl_hdr);
+}
+
+#else
+
+int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
+
 #endif
+
+static struct notifier_block ip_vs_dst_notifier = {
+       .notifier_call = ip_vs_dst_event,
+};
+
+int __net_init __ip_vs_control_init(struct net *net)
+{
+       int idx;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+       /* Initialize rs_table */
+       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+       INIT_LIST_HEAD(&ipvs->dest_trash);
+       atomic_set(&ipvs->ftpsvc_counter, 0);
+       atomic_set(&ipvs->nullsvc_counter, 0);
+
+       /* procfs stats */
+       ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!ipvs->tot_stats.cpustats) {
+               pr_err("%s(): alloc_percpu.\n", __func__);
+               return -ENOMEM;
+       }
+       spin_lock_init(&ipvs->tot_stats.lock);
+
+       proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+       proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+       proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+                            &ip_vs_stats_percpu_fops);
+
+       if (__ip_vs_control_init_sysctl(net))
+               goto err;
+
+       return 0;
+
+err:
+       free_percpu(ipvs->tot_stats.cpustats);
+       return -ENOMEM;
+}
+
+void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_trash_cleanup(net);
+       ip_vs_stop_estimator(net, &ipvs->tot_stats);
+       __ip_vs_control_cleanup_sysctl(net);
        proc_net_remove(net, "ip_vs_stats_percpu");
        proc_net_remove(net, "ip_vs_stats");
        proc_net_remove(net, "ip_vs");
        free_percpu(ipvs->tot_stats.cpustats);
 }
 
-static struct pernet_operations ipvs_control_ops = {
-       .init = __ip_vs_control_init,
-       .exit = __ip_vs_control_cleanup,
-};
-
 int __init ip_vs_control_init(void)
 {
        int idx;
@@ -3633,33 +3741,32 @@ int __init ip_vs_control_init(void)
                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
        }
 
-       ret = register_pernet_subsys(&ipvs_control_ops);
-       if (ret) {
-               pr_err("cannot register namespace.\n");
-               goto err;
-       }
-
        smp_wmb();      /* Do we really need it now ? */
 
        ret = nf_register_sockopt(&ip_vs_sockopts);
        if (ret) {
                pr_err("cannot register sockopt.\n");
-               goto err_net;
+               goto err_sock;
        }
 
        ret = ip_vs_genl_register();
        if (ret) {
                pr_err("cannot register Generic Netlink interface.\n");
-               nf_unregister_sockopt(&ip_vs_sockopts);
-               goto err_net;
+               goto err_genl;
        }
 
+       ret = register_netdevice_notifier(&ip_vs_dst_notifier);
+       if (ret < 0)
+               goto err_notf;
+
        LeaveFunction(2);
        return 0;
 
-err_net:
-       unregister_pernet_subsys(&ipvs_control_ops);
-err:
+err_notf:
+       ip_vs_genl_unregister();
+err_genl:
+       nf_unregister_sockopt(&ip_vs_sockopts);
+err_sock:
        return ret;
 }
 
@@ -3667,7 +3774,6 @@ err:
 void ip_vs_control_cleanup(void)
 {
        EnterFunction(2);
-       unregister_pernet_subsys(&ipvs_control_ops);
        ip_vs_genl_unregister();
        nf_unregister_sockopt(&ip_vs_sockopts);
        LeaveFunction(2);