ipvs: convert lblc scheduler to rcu
[linux-3.10.git] / net / netfilter / x_tables.c
index 69c5628..686c771 100644 (file)
@@ -12,8 +12,9 @@
  * published by the Free Software Foundation.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/net.h>
 #include <linux/proc_fs.h>
@@ -22,6 +23,8 @@
 #include <linux/vmalloc.h>
 #include <linux/mutex.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/audit.h>
 #include <net/net_namespace.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -37,9 +40,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
 struct compat_delta {
-       struct compat_delta *next;
-       unsigned int offset;
-       short delta;
+       unsigned int offset; /* offset in kernel */
+       int delta; /* delta in 32bit user land */
 };
 
 struct xt_af {
@@ -48,18 +50,14 @@ struct xt_af {
        struct list_head target;
 #ifdef CONFIG_COMPAT
        struct mutex compat_mutex;
-       struct compat_delta *compat_offsets;
+       struct compat_delta *compat_tab;
+       unsigned int number; /* number of slots in compat_tab[] */
+       unsigned int cur; /* number of used slots in compat_tab[] */
 #endif
 };
 
 static struct xt_af *xt;
 
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
        [NFPROTO_UNSPEC] = "x",
        [NFPROTO_IPV4]   = "ip",
@@ -68,6 +66,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
        [NFPROTO_IPV6]   = "ip6",
 };
 
+/* Allow this many total (re)entries. */
+static const unsigned int xt_jumpstack_multiplier = 2;
+
 /* Registration hooks for targets. */
 int
 xt_register_target(struct xt_target *target)
@@ -118,10 +119,8 @@ EXPORT_SYMBOL(xt_register_targets);
 void
 xt_unregister_targets(struct xt_target *target, unsigned int n)
 {
-       unsigned int i;
-
-       for (i = 0; i < n; i++)
-               xt_unregister_target(&target[i]);
+       while (n-- > 0)
+               xt_unregister_target(&target[n]);
 }
 EXPORT_SYMBOL(xt_unregister_targets);
 
@@ -176,10 +175,8 @@ EXPORT_SYMBOL(xt_register_matches);
 void
 xt_unregister_matches(struct xt_match *match, unsigned int n)
 {
-       unsigned int i;
-
-       for (i = 0; i < n; i++)
-               xt_unregister_match(&match[i]);
+       while (n-- > 0)
+               xt_unregister_match(&match[n]);
 }
 EXPORT_SYMBOL(xt_unregister_matches);
 
@@ -187,14 +184,14 @@ EXPORT_SYMBOL(xt_unregister_matches);
 /*
  * These are weird, but module loading must not be done with mutex
  * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
+ * function to use.
  */
 
 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
 struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 {
        struct xt_match *m;
-       int err = 0;
+       int err = -ENOENT;
 
        if (mutex_lock_interruptible(&xt[af].mutex) != 0)
                return ERR_PTR(-EINTR);
@@ -220,11 +217,26 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 }
 EXPORT_SYMBOL(xt_find_match);
 
+struct xt_match *
+xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
+{
+       struct xt_match *match;
+
+       match = xt_find_match(nfproto, name, revision);
+       if (IS_ERR(match)) {
+               request_module("%st_%s", xt_prefix[nfproto], name);
+               match = xt_find_match(nfproto, name, revision);
+       }
+
+       return match;
+}
+EXPORT_SYMBOL_GPL(xt_request_find_match);
+
 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
 struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 {
        struct xt_target *t;
-       int err = 0;
+       int err = -ENOENT;
 
        if (mutex_lock_interruptible(&xt[af].mutex) != 0)
                return ERR_PTR(-EINTR);
@@ -254,10 +266,12 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
 {
        struct xt_target *target;
 
-       target = try_then_request_module(xt_find_target(af, name, revision),
-                                        "%st_%s", xt_prefix[af], name);
-       if (IS_ERR(target) || !target)
-               return NULL;
+       target = xt_find_target(af, name, revision);
+       if (IS_ERR(target)) {
+               request_module("%st_%s", xt_prefix[af], name);
+               target = xt_find_target(af, name, revision);
+       }
+
        return target;
 }
 EXPORT_SYMBOL_GPL(xt_request_find_target);
@@ -331,19 +345,27 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 }
 EXPORT_SYMBOL_GPL(xt_find_revision);
 
-static char *textify_hooks(char *buf, size_t size, unsigned int mask)
+static char *
+textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
 {
-       static const char *const names[] = {
+       static const char *const inetbr_names[] = {
                "PREROUTING", "INPUT", "FORWARD",
                "OUTPUT", "POSTROUTING", "BROUTING",
        };
-       unsigned int i;
+       static const char *const arp_names[] = {
+               "INPUT", "FORWARD", "OUTPUT",
+       };
+       const char *const *names;
+       unsigned int i, max;
        char *p = buf;
        bool np = false;
        int res;
 
+       names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names;
+       max   = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) :
+                                          ARRAY_SIZE(inetbr_names);
        *p = '\0';
-       for (i = 0; i < ARRAY_SIZE(names); ++i) {
+       for (i = 0; i < max; ++i) {
                if (!(mask & (1 << i)))
                        continue;
                res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
@@ -360,6 +382,8 @@ static char *textify_hooks(char *buf, size_t size, unsigned int mask)
 int xt_check_match(struct xt_mtchk_param *par,
                   unsigned int size, u_int8_t proto, bool inv_proto)
 {
+       int ret;
+
        if (XT_ALIGN(par->match->matchsize) != size &&
            par->match->matchsize != -1) {
                /*
@@ -386,8 +410,10 @@ int xt_check_match(struct xt_mtchk_param *par,
                pr_err("%s_tables: %s match: used from hooks %s, but only "
                       "valid from %s\n",
                       xt_prefix[par->family], par->match->name,
-                      textify_hooks(used, sizeof(used), par->hook_mask),
-                      textify_hooks(allow, sizeof(allow), par->match->hooks));
+                      textify_hooks(used, sizeof(used), par->hook_mask,
+                                    par->family),
+                      textify_hooks(allow, sizeof(allow), par->match->hooks,
+                                    par->family));
                return -EINVAL;
        }
        if (par->match->proto && (par->match->proto != proto || inv_proto)) {
@@ -396,61 +422,80 @@ int xt_check_match(struct xt_mtchk_param *par,
                       par->match->proto);
                return -EINVAL;
        }
-       if (par->match->checkentry != NULL && !par->match->checkentry(par))
-               return -EINVAL;
+       if (par->match->checkentry != NULL) {
+               ret = par->match->checkentry(par);
+               if (ret < 0)
+                       return ret;
+               else if (ret > 0)
+                       /* Flag up potential errors. */
+                       return -EIO;
+       }
        return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {
-       struct compat_delta *tmp;
-
-       tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
-       if (!tmp)
-               return -ENOMEM;
+       struct xt_af *xp = &xt[af];
+
+       if (!xp->compat_tab) {
+               if (!xp->number)
+                       return -EINVAL;
+               xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
+               if (!xp->compat_tab)
+                       return -ENOMEM;
+               xp->cur = 0;
+       }
 
-       tmp->offset = offset;
-       tmp->delta = delta;
+       if (xp->cur >= xp->number)
+               return -EINVAL;
 
-       if (xt[af].compat_offsets) {
-               tmp->next = xt[af].compat_offsets->next;
-               xt[af].compat_offsets->next = tmp;
-       } else {
-               xt[af].compat_offsets = tmp;
-               tmp->next = NULL;
-       }
+       if (xp->cur)
+               delta += xp->compat_tab[xp->cur - 1].delta;
+       xp->compat_tab[xp->cur].offset = offset;
+       xp->compat_tab[xp->cur].delta = delta;
+       xp->cur++;
        return 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_add_offset);
 
 void xt_compat_flush_offsets(u_int8_t af)
 {
-       struct compat_delta *tmp, *next;
-
-       if (xt[af].compat_offsets) {
-               for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
-                       next = tmp->next;
-                       kfree(tmp);
-               }
-               xt[af].compat_offsets = NULL;
+       if (xt[af].compat_tab) {
+               vfree(xt[af].compat_tab);
+               xt[af].compat_tab = NULL;
+               xt[af].number = 0;
+               xt[af].cur = 0;
        }
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
-short xt_compat_calc_jump(u_int8_t af, unsigned int offset)
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
-       struct compat_delta *tmp;
-       short delta;
-
-       for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
-               if (tmp->offset < offset)
-                       delta += tmp->delta;
-       return delta;
+       struct compat_delta *tmp = xt[af].compat_tab;
+       int mid, left = 0, right = xt[af].cur - 1;
+
+       while (left <= right) {
+               mid = (left + right) >> 1;
+               if (offset > tmp[mid].offset)
+                       left = mid + 1;
+               else if (offset < tmp[mid].offset)
+                       right = mid - 1;
+               else
+                       return mid ? tmp[mid - 1].delta : 0;
+       }
+       return left ? tmp[left - 1].delta : 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
 
+void xt_compat_init_offsets(u_int8_t af, unsigned int number)
+{
+       xt[af].number = number;
+       xt[af].cur = 0;
+}
+EXPORT_SYMBOL(xt_compat_init_offsets);
+
 int xt_compat_match_offset(const struct xt_match *match)
 {
        u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -517,6 +562,8 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
 int xt_check_target(struct xt_tgchk_param *par,
                    unsigned int size, u_int8_t proto, bool inv_proto)
 {
+       int ret;
+
        if (XT_ALIGN(par->target->targetsize) != size) {
                pr_err("%s_tables: %s.%u target: invalid size "
                       "%u (kernel) != (user) %u\n",
@@ -538,8 +585,10 @@ int xt_check_target(struct xt_tgchk_param *par,
                pr_err("%s_tables: %s target: used from hooks %s, but only "
                       "usable from %s\n",
                       xt_prefix[par->family], par->target->name,
-                      textify_hooks(used, sizeof(used), par->hook_mask),
-                      textify_hooks(allow, sizeof(allow), par->target->hooks));
+                      textify_hooks(used, sizeof(used), par->hook_mask,
+                                    par->family),
+                      textify_hooks(allow, sizeof(allow), par->target->hooks,
+                                    par->family));
                return -EINVAL;
        }
        if (par->target->proto && (par->target->proto != proto || inv_proto)) {
@@ -548,8 +597,14 @@ int xt_check_target(struct xt_tgchk_param *par,
                       par->target->proto);
                return -EINVAL;
        }
-       if (par->target->checkentry != NULL && !par->target->checkentry(par))
-               return -EINVAL;
+       if (par->target->checkentry != NULL) {
+               ret = par->target->checkentry(par);
+               if (ret < 0)
+                       return ret;
+               else if (ret > 0)
+                       /* Flag up potential errors. */
+                       return -EIO;
+       }
        return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_target);
@@ -661,6 +716,24 @@ void xt_free_table_info(struct xt_table_info *info)
                else
                        vfree(info->entries[cpu]);
        }
+
+       if (info->jumpstack != NULL) {
+               if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
+                       for_each_possible_cpu(cpu)
+                               vfree(info->jumpstack[cpu]);
+               } else {
+                       for_each_possible_cpu(cpu)
+                               kfree(info->jumpstack[cpu]);
+               }
+       }
+
+       if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
+               vfree(info->jumpstack);
+       else
+               kfree(info->jumpstack);
+
+       free_percpu(info->stackptr);
+
        kfree(info);
 }
 EXPORT_SYMBOL(xt_free_table_info);
@@ -702,9 +775,46 @@ void xt_compat_unlock(u_int8_t af)
 EXPORT_SYMBOL_GPL(xt_compat_unlock);
 #endif
 
-DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
-EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
+DEFINE_PER_CPU(seqcount_t, xt_recseq);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
+
+static int xt_jumpstack_alloc(struct xt_table_info *i)
+{
+       unsigned int size;
+       int cpu;
+
+       i->stackptr = alloc_percpu(unsigned int);
+       if (i->stackptr == NULL)
+               return -ENOMEM;
+
+       size = sizeof(void **) * nr_cpu_ids;
+       if (size > PAGE_SIZE)
+               i->jumpstack = vzalloc(size);
+       else
+               i->jumpstack = kzalloc(size, GFP_KERNEL);
+       if (i->jumpstack == NULL)
+               return -ENOMEM;
 
+       i->stacksize *= xt_jumpstack_multiplier;
+       size = sizeof(void *) * i->stacksize;
+       for_each_possible_cpu(cpu) {
+               if (size > PAGE_SIZE)
+                       i->jumpstack[cpu] = vmalloc_node(size,
+                               cpu_to_node(cpu));
+               else
+                       i->jumpstack[cpu] = kmalloc_node(size,
+                               GFP_KERNEL, cpu_to_node(cpu));
+               if (i->jumpstack[cpu] == NULL)
+                       /*
+                        * Freeing will be done later on by the callers. The
+                        * chain is: xt_replace_table -> __do_replace ->
+                        * do_replace -> xt_free_table_info.
+                        */
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
 
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
@@ -713,6 +823,13 @@ xt_replace_table(struct xt_table *table,
              int *error)
 {
        struct xt_table_info *private;
+       int ret;
+
+       ret = xt_jumpstack_alloc(newinfo);
+       if (ret < 0) {
+               *error = ret;
+               return NULL;
+       }
 
        /* Do the substitution. */
        local_bh_disable();
@@ -720,7 +837,7 @@ xt_replace_table(struct xt_table *table,
 
        /* Check inside lock: is the old number correct? */
        if (num_counters != private->number) {
-               duprintf("num_counters != table->private->number (%u/%u)\n",
+               pr_debug("num_counters != table->private->number (%u/%u)\n",
                         num_counters, private->number);
                local_bh_enable();
                *error = -EAGAIN;
@@ -738,6 +855,21 @@ xt_replace_table(struct xt_table *table,
         */
        local_bh_enable();
 
+#ifdef CONFIG_AUDIT
+       if (audit_enabled) {
+               struct audit_buffer *ab;
+
+               ab = audit_log_start(current->audit_context, GFP_KERNEL,
+                                    AUDIT_NETFILTER_CFG);
+               if (ab) {
+                       audit_log_format(ab, "table=%s family=%u entries=%u",
+                                        table->name, table->af,
+                                        private->number);
+                       audit_log_end(ab);
+               }
+       }
+#endif
+
        return private;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -777,7 +909,7 @@ struct xt_table *xt_register_table(struct net *net,
                goto unlock;
 
        private = table->private;
-       duprintf("table->private->number = %u\n", private->number);
+       pr_debug("table->private->number = %u\n", private->number);
 
        /* save number of initial entries */
        private->initial_entries = private->number;
@@ -1191,12 +1323,12 @@ int xt_proto_init(struct net *net, u_int8_t af)
 out_remove_matches:
        strlcpy(buf, xt_prefix[af], sizeof(buf));
        strlcat(buf, FORMAT_MATCHES, sizeof(buf));
-       proc_net_remove(net, buf);
+       remove_proc_entry(buf, net->proc_net);
 
 out_remove_tables:
        strlcpy(buf, xt_prefix[af], sizeof(buf));
        strlcat(buf, FORMAT_TABLES, sizeof(buf));
-       proc_net_remove(net, buf);
+       remove_proc_entry(buf, net->proc_net);
 out:
        return -1;
 #endif
@@ -1210,15 +1342,15 @@ void xt_proto_fini(struct net *net, u_int8_t af)
 
        strlcpy(buf, xt_prefix[af], sizeof(buf));
        strlcat(buf, FORMAT_TABLES, sizeof(buf));
-       proc_net_remove(net, buf);
+       remove_proc_entry(buf, net->proc_net);
 
        strlcpy(buf, xt_prefix[af], sizeof(buf));
        strlcat(buf, FORMAT_TARGETS, sizeof(buf));
-       proc_net_remove(net, buf);
+       remove_proc_entry(buf, net->proc_net);
 
        strlcpy(buf, xt_prefix[af], sizeof(buf));
        strlcat(buf, FORMAT_MATCHES, sizeof(buf));
-       proc_net_remove(net, buf);
+       remove_proc_entry(buf, net->proc_net);
 #endif /*CONFIG_PROC_FS*/
 }
 EXPORT_SYMBOL_GPL(xt_proto_fini);
@@ -1242,9 +1374,7 @@ static int __init xt_init(void)
        int rv;
 
        for_each_possible_cpu(i) {
-               struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
-               spin_lock_init(&lock->lock);
-               lock->readers = 0;
+               seqcount_init(&per_cpu(xt_recseq, i));
        }
 
        xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
@@ -1255,7 +1385,7 @@ static int __init xt_init(void)
                mutex_init(&xt[i].mutex);
 #ifdef CONFIG_COMPAT
                mutex_init(&xt[i].compat_mutex);
-               xt[i].compat_offsets = NULL;
+               xt[i].compat_tab = NULL;
 #endif
                INIT_LIST_HEAD(&xt[i].target);
                INIT_LIST_HEAD(&xt[i].match);