netfilter: xtables: stackptr should be percpu
Eric Dumazet [Mon, 31 May 2010 14:41:35 +0000 (16:41 +0200)]
commit f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant)
introduced a performance regression, because stackptr array is shared by
all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache
line)

Fix this using alloc_percpu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-By: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>

include/linux/netfilter/x_tables.h
net/ipv4/netfilter/ip_tables.c
net/ipv6/netfilter/ip6_tables.c
net/netfilter/x_tables.c

index c00cc0c..24e5d01 100644 (file)
@@ -397,7 +397,7 @@ struct xt_table_info {
         * @stacksize jumps (number of user chains) can possibly be made.
         */
        unsigned int stacksize;
-       unsigned int *stackptr;
+       unsigned int __percpu *stackptr;
        void ***jumpstack;
        /* ipt_entry tables: one per CPU */
        /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
index 63958f3..4b6c5ca 100644 (file)
@@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb,
        cpu        = smp_processor_id();
        table_base = private->entries[cpu];
        jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
-       stackptr   = &private->stackptr[cpu];
+       stackptr   = per_cpu_ptr(private->stackptr, cpu);
        origptr    = *stackptr;
 
        e = get_entry(table_base, private->hook_entry[hook]);
index 6f517bd..9d2d68f 100644 (file)
@@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb,
        cpu        = smp_processor_id();
        table_base = private->entries[cpu];
        jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
-       stackptr   = &private->stackptr[cpu];
+       stackptr   = per_cpu_ptr(private->stackptr, cpu);
        origptr    = *stackptr;
 
        e = get_entry(table_base, private->hook_entry[hook]);
index 47b1e79..e34622f 100644 (file)
@@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info)
                vfree(info->jumpstack);
        else
                kfree(info->jumpstack);
-       if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
-               vfree(info->stackptr);
-       else
-               kfree(info->stackptr);
+
+       free_percpu(info->stackptr);
 
        kfree(info);
 }
@@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
        unsigned int size;
        int cpu;
 
-       size = sizeof(unsigned int) * nr_cpu_ids;
-       if (size > PAGE_SIZE)
-               i->stackptr = vmalloc(size);
-       else
-               i->stackptr = kmalloc(size, GFP_KERNEL);
+       i->stackptr = alloc_percpu(unsigned int);
        if (i->stackptr == NULL)
                return -ENOMEM;
-       memset(i->stackptr, 0, size);
 
        size = sizeof(void **) * nr_cpu_ids;
        if (size > PAGE_SIZE)