slub: reduce overhead of slub_debug
Marcin Slusarz [Sun, 26 Jun 2011 19:39:18 +0000 (21:39 +0200)]
slub checks for poison one byte by one, which is highly inefficient
and shows up frequently as a highest cpu-eater in perf top.

Joining reads gives nice speedup:

(Compiling some project with different options)
                                 make -j12    make clean
slub_debug disabled:             1m 27s       1.2 s
slub_debug enabled:              1m 46s       7.6 s
slub_debug enabled + this patch: 1m 33s       3.2 s

check_bytes still shows up high, but not always at the top.

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: linux-mm@kvack.org
Signed-off-by: Pekka Enberg <penberg@kernel.org>

mm/slub.c

index 0e4f4f8..e3403b3 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -589,10 +589,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
                memset(p + s->objsize, val, s->inuse - s->objsize);
 }
 
-static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
+static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
 {
        while (bytes) {
-               if (*start != (u8)value)
+               if (*start != value)
                        return start;
                start++;
                bytes--;
@@ -600,6 +600,38 @@ static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
        return NULL;
 }
 
+static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
+{
+       u64 value64;
+       unsigned int words, prefix;
+
+       if (bytes <= 16)
+               return check_bytes8(start, value, bytes);
+
+       value64 = value | value << 8 | value << 16 | value << 24;
+       value64 = value64 | value64 << 32;
+       prefix = 8 - ((unsigned long)start) % 8;
+
+       if (prefix) {
+               u8 *r = check_bytes8(start, value, prefix);
+               if (r)
+                       return r;
+               start += prefix;
+               bytes -= prefix;
+       }
+
+       words = bytes / 8;
+
+       while (words) {
+               if (*(u64 *)start != value64)
+                       return check_bytes8(start, value, 8);
+               start += 8;
+               words--;
+       }
+
+       return check_bytes8(start, value, bytes % 8);
+}
+
 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
                                                void *from, void *to)
 {