mm: sync vmalloc address space page tables in alloc_vm_area()
[linux-2.6.git] / mm / slub.c
index 41a15c1..9f662d7 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -28,6 +28,7 @@
 #include <linux/memory.h>
 #include <linux/math64.h>
 #include <linux/fault-inject.h>
+#include <linux/stacktrace.h>
 
 #include <trace/events/kmem.h>
 
@@ -194,8 +195,12 @@ static LIST_HEAD(slab_caches);
 /*
  * Tracking user of a slab.
  */
+#define TRACK_ADDRS_COUNT 16
 struct track {
        unsigned long addr;     /* Called from address */
+#ifdef CONFIG_STACKTRACE
+       unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
+#endif
        int cpu;                /* Was running on cpu */
        int pid;                /* Pid context */
        unsigned long when;     /* When did the operation occur */
@@ -354,6 +359,42 @@ static __always_inline void slab_unlock(struct page *page)
        __bit_spin_unlock(PG_locked, &page->flags);
 }
 
+/* Interrupts must be disabled (for the fallback code to work right) */
+static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
+               void *freelist_old, unsigned long counters_old,
+               void *freelist_new, unsigned long counters_new,
+               const char *n)
+{
+       VM_BUG_ON(!irqs_disabled());
+#ifdef CONFIG_CMPXCHG_DOUBLE
+       if (s->flags & __CMPXCHG_DOUBLE) {
+               if (cmpxchg_double(&page->freelist,
+                       freelist_old, counters_old,
+                       freelist_new, counters_new))
+               return 1;
+       } else
+#endif
+       {
+               slab_lock(page);
+               if (page->freelist == freelist_old && page->counters == counters_old) {
+                       page->freelist = freelist_new;
+                       page->counters = counters_new;
+                       slab_unlock(page);
+                       return 1;
+               }
+               slab_unlock(page);
+       }
+
+       cpu_relax();
+       stat(s, CMPXCHG_DOUBLE_FAIL);
+
+#ifdef SLUB_DEBUG_CMPXCHG
+       printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
+#endif
+
+       return 0;
+}
+
 static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
                void *freelist_old, unsigned long counters_old,
                void *freelist_new, unsigned long counters_new,
@@ -368,14 +409,19 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
        } else
 #endif
        {
+               unsigned long flags;
+
+               local_irq_save(flags);
                slab_lock(page);
                if (page->freelist == freelist_old && page->counters == counters_old) {
                        page->freelist = freelist_new;
                        page->counters = counters_new;
                        slab_unlock(page);
+                       local_irq_restore(flags);
                        return 1;
                }
                slab_unlock(page);
+               local_irq_restore(flags);
        }
 
        cpu_relax();
@@ -470,6 +516,24 @@ static void set_track(struct kmem_cache *s, void *object,
        struct track *p = get_track(s, object, alloc);
 
        if (addr) {
+#ifdef CONFIG_STACKTRACE
+               struct stack_trace trace;
+               int i;
+
+               trace.nr_entries = 0;
+               trace.max_entries = TRACK_ADDRS_COUNT;
+               trace.entries = p->addrs;
+               trace.skip = 3;
+               save_stack_trace(&trace);
+
+               /* See rant in lockdep.c */
+               if (trace.nr_entries != 0 &&
+                   trace.entries[trace.nr_entries - 1] == ULONG_MAX)
+                       trace.nr_entries--;
+
+               for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
+                       p->addrs[i] = 0;
+#endif
                p->addr = addr;
                p->cpu = smp_processor_id();
                p->pid = current->pid;
@@ -494,6 +558,16 @@ static void print_track(const char *s, struct track *t)
 
        printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
                s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
+#ifdef CONFIG_STACKTRACE
+       {
+               int i;
+               for (i = 0; i < TRACK_ADDRS_COUNT; i++)
+                       if (t->addrs[i])
+                               printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
+                       else
+                               break;
+       }
+#endif
 }
 
 static void print_tracking(struct kmem_cache *s, void *object)
@@ -607,10 +681,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
                memset(p + s->objsize, val, s->inuse - s->objsize);
 }
 
-static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
+static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
 {
        while (bytes) {
-               if (*start != (u8)value)
+               if (*start != value)
                        return start;
                start++;
                bytes--;
@@ -618,6 +692,38 @@ static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
        return NULL;
 }
 
+static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
+{
+       u64 value64;
+       unsigned int words, prefix;
+
+       if (bytes <= 16)
+               return check_bytes8(start, value, bytes);
+
+       value64 = value | value << 8 | value << 16 | value << 24;
+       value64 = (value64 & 0xffffffff) | value64 << 32;
+       prefix = 8 - ((unsigned long)start) % 8;
+
+       if (prefix) {
+               u8 *r = check_bytes8(start, value, prefix);
+               if (r)
+                       return r;
+               start += prefix;
+               bytes -= prefix;
+       }
+
+       words = bytes / 8;
+
+       while (words) {
+               if (*(u64 *)start != value64)
+                       return check_bytes8(start, value, 8);
+               start += 8;
+               words--;
+       }
+
+       return check_bytes8(start, value, bytes % 8);
+}
+
 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
                                                void *from, void *to)
 {
@@ -1471,7 +1577,7 @@ static inline int acquire_slab(struct kmem_cache *s,
                VM_BUG_ON(new.frozen);
                new.frozen = 1;
 
-       } while (!cmpxchg_double_slab(s, page,
+       } while (!__cmpxchg_double_slab(s, page,
                        freelist, counters,
                        NULL, new.counters,
                        "lock and freeze"));
@@ -1709,7 +1815,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
                        new.inuse--;
                        VM_BUG_ON(!new.frozen);
 
-               } while (!cmpxchg_double_slab(s, page,
+               } while (!__cmpxchg_double_slab(s, page,
                        prior, counters,
                        freelist, new.counters,
                        "drain percpu freelist"));
@@ -1748,7 +1854,7 @@ redo:
 
        new.frozen = 0;
 
-       if (!new.inuse && n->nr_partial < s->min_partial)
+       if (!new.inuse && n->nr_partial > s->min_partial)
                m = M_FREE;
        else if (new.freelist) {
                m = M_PARTIAL;
@@ -1798,7 +1904,7 @@ redo:
        }
 
        l = m;
-       if (!cmpxchg_double_slab(s, page,
+       if (!__cmpxchg_double_slab(s, page,
                                old.freelist, old.counters,
                                new.freelist, new.counters,
                                "unfreezing slab"))
@@ -1966,6 +2072,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
                goto new_slab;
 
        if (unlikely(!node_match(c, node))) {
+               stat(s, ALLOC_NODE_MISMATCH);
                deactivate_slab(s, c);
                goto new_slab;
        }
@@ -1976,22 +2083,36 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
                object = page->freelist;
                counters = page->counters;
                new.counters = counters;
-               new.inuse = page->objects;
                VM_BUG_ON(!new.frozen);
 
-       } while (!cmpxchg_double_slab(s, page,
+               /*
+                * If there is no object left then we use this loop to
+                * deactivate the slab which is simple since no objects
+                * are left in the slab and therefore we do not need to
+                * put the page back onto the partial list.
+                *
+                * If there are objects left then we retrieve them
+                * and use them to refill the per cpu queue.
+               */
+
+               new.inuse = page->objects;
+               new.frozen = object != NULL;
+
+       } while (!__cmpxchg_double_slab(s, page,
                        object, counters,
                        NULL, new.counters,
                        "__slab_alloc"));
 
-load_freelist:
-       VM_BUG_ON(!page->frozen);
-
-       if (unlikely(!object))
+       if (unlikely(!object)) {
+               c->page = NULL;
+               stat(s, DEACTIVATE_BYPASS);
                goto new_slab;
+       }
 
        stat(s, ALLOC_REFILL);
 
+load_freelist:
+       VM_BUG_ON(!page->frozen);
        c->freelist = get_freepointer(s, object);
        c->tid = next_tid(c->tid);
        local_irq_restore(flags);
@@ -2026,6 +2147,9 @@ new_slab:
                stat(s, ALLOC_SLAB);
                c->node = page_to_nid(page);
                c->page = page;
+
+               if (kmem_cache_debug(s))
+                       goto debug;
                goto load_freelist;
        }
        if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
@@ -2263,11 +2387,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 slab_empty:
        if (prior) {
                /*
-                * Slab still on the partial list.
+                * Slab on the partial list.
                 */
                remove_partial(n, page);
                stat(s, FREE_REMOVE_PARTIAL);
-       }
+       } else
+               /* Slab must be on the full list */
+               remove_full(s, page);
 
        spin_unlock_irqrestore(&n->list_lock, flags);
        stat(s, FREE_SLAB);
@@ -3120,6 +3246,42 @@ size_t ksize(const void *object)
 }
 EXPORT_SYMBOL(ksize);
 
+#ifdef CONFIG_SLUB_DEBUG
+bool verify_mem_not_deleted(const void *x)
+{
+       struct page *page;
+       void *object = (void *)x;
+       unsigned long flags;
+       bool rv;
+
+       if (unlikely(ZERO_OR_NULL_PTR(x)))
+               return false;
+
+       local_irq_save(flags);
+
+       page = virt_to_head_page(x);
+       if (unlikely(!PageSlab(page))) {
+               /* maybe it was from stack? */
+               rv = true;
+               goto out_unlock;
+       }
+
+       slab_lock(page);
+       if (on_freelist(page->slab, page, object)) {
+               object_err(page->slab, page, object, "Object is on free-list");
+               rv = false;
+       } else {
+               rv = true;
+       }
+       slab_unlock(page);
+
+out_unlock:
+       local_irq_restore(flags);
+       return rv;
+}
+EXPORT_SYMBOL(verify_mem_not_deleted);
+#endif
+
 void kfree(const void *x)
 {
        struct page *page;
@@ -4241,7 +4403,7 @@ static int any_slab_objects(struct kmem_cache *s)
 #endif
 
 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
-#define to_slab(n) container_of(n, struct kmem_cache, kobj);
+#define to_slab(n) container_of(n, struct kmem_cache, kobj)
 
 struct slab_attribute {
        struct attribute attr;
@@ -4671,6 +4833,7 @@ STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
 STAT_ATTR(ALLOC_SLAB, alloc_slab);
 STAT_ATTR(ALLOC_REFILL, alloc_refill);
+STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
 STAT_ATTR(FREE_SLAB, free_slab);
 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
 STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
@@ -4678,6 +4841,7 @@ STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
+STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
 STAT_ATTR(ORDER_FALLBACK, order_fallback);
 STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
 STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
@@ -4730,6 +4894,7 @@ static struct attribute *slab_attrs[] = {
        &alloc_from_partial_attr.attr,
        &alloc_slab_attr.attr,
        &alloc_refill_attr.attr,
+       &alloc_node_mismatch_attr.attr,
        &free_slab_attr.attr,
        &cpuslab_flush_attr.attr,
        &deactivate_full_attr.attr,
@@ -4737,6 +4902,7 @@ static struct attribute *slab_attrs[] = {
        &deactivate_to_head_attr.attr,
        &deactivate_to_tail_attr.attr,
        &deactivate_remote_frees_attr.attr,
+       &deactivate_bypass_attr.attr,
        &order_fallback_attr.attr,
        &cmpxchg_double_fail_attr.attr,
        &cmpxchg_double_cpu_fail_attr.attr,