mm: page_alloc: remove trailing whitespace
[linux-2.6.git] / mm / kmemleak.c
index 1d7645b..45eb621 100644 (file)
 #include <linux/sched.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kthread.h>
 #include <linux/prio_tree.h>
-#include <linux/gfp.h>
 #include <linux/fs.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
+#include <linux/workqueue.h>
+#include <linux/crc32.h>
 
 #include <asm/sections.h>
 #include <asm/processor.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
+#include <linux/kmemcheck.h>
 #include <linux/kmemleak.h>
+#include <linux/memory_hotplug.h>
 
 /*
  * Kmemleak configuration and common defines.
 #define MSECS_MIN_AGE          5000    /* minimum object age for reporting */
 #define SECS_FIRST_SCAN                60      /* delay before the first scan */
 #define SECS_SCAN_WAIT         600     /* subsequent auto scanning delay */
-#define GRAY_LIST_PASSES       25      /* maximum number of gray list scans */
 #define MAX_SCAN_SIZE          4096    /* maximum size of a scanned block */
 
 #define BYTES_PER_POINTER      sizeof(void *)
 
 /* GFP bitmask for kmemleak internal allocations */
-#define GFP_KMEMLEAK_MASK      (GFP_KERNEL | GFP_ATOMIC)
+#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
+                                __GFP_NORETRY | __GFP_NOMEMALLOC | \
+                                __GFP_NOWARN)
 
 /* scanning area inside a memory block */
 struct kmemleak_scan_area {
        struct hlist_node node;
-       unsigned long offset;
-       size_t length;
+       unsigned long start;
+       size_t size;
 };
 
+#define KMEMLEAK_GREY  0
+#define KMEMLEAK_BLACK -1
+
 /*
  * Structure holding the metadata for each allocated memory block.
  * Modifications to such objects should be made while holding the
@@ -144,6 +151,8 @@ struct kmemleak_object {
        int min_count;
        /* the total number of pointers found pointing to this object */
        int count;
+       /* checksum for detecting modified objects */
+       u32 checksum;
        /* memory ranges to be scanned inside an object (empty for all) */
        struct hlist_head area_list;
        unsigned long trace[MAX_TRACE];
@@ -159,8 +168,6 @@ struct kmemleak_object {
 #define OBJECT_REPORTED                (1 << 1)
 /* flag set to not scan the object */
 #define OBJECT_NO_SCAN         (1 << 2)
-/* flag set on newly allocated objects */
-#define OBJECT_NEW             (1 << 3)
 
 /* number of bytes to print per line; must be 16 or 32 */
 #define HEX_ROW_SIZE           16
@@ -190,7 +197,9 @@ static atomic_t kmemleak_enabled = ATOMIC_INIT(0);
 static atomic_t kmemleak_initialized = ATOMIC_INIT(0);
 /* enables or disables early logging of the memory operations */
 static atomic_t kmemleak_early_log = ATOMIC_INIT(1);
-/* set if a fata kmemleak error has occurred */
+/* set if a kmemleak warning was issued */
+static atomic_t kmemleak_warning = ATOMIC_INIT(0);
+/* set if a fatal kmemleak error has occurred */
 static atomic_t kmemleak_error = ATOMIC_INIT(0);
 
 /* minimum and maximum address that may be valid pointers */
@@ -207,6 +216,9 @@ static signed long jiffies_scan_wait;
 static int kmemleak_stack_scan = 1;
 /* protects the memory scanning, parameters and debug/kmemleak file access */
 static DEFINE_MUTEX(scan_mutex);
+/* setting kmemleak=on, will set this var, skipping the disable */
+static int kmemleak_skip_disable;
+
 
 /*
  * Early object allocation/freeing logging. Kmemleak is initialized after the
@@ -219,8 +231,10 @@ static DEFINE_MUTEX(scan_mutex);
 /* kmemleak operation type for early logging */
 enum {
        KMEMLEAK_ALLOC,
+       KMEMLEAK_ALLOC_PERCPU,
        KMEMLEAK_FREE,
        KMEMLEAK_FREE_PART,
+       KMEMLEAK_FREE_PERCPU,
        KMEMLEAK_NOT_LEAK,
        KMEMLEAK_IGNORE,
        KMEMLEAK_SCAN_AREA,
@@ -236,8 +250,6 @@ struct early_log {
        const void *ptr;                /* allocated/freed memory block */
        size_t size;                    /* memory block size */
        int min_count;                  /* minimum reference count */
-       unsigned long offset;           /* scan area offset */
-       size_t length;                  /* scan area length */
        unsigned long trace[MAX_TRACE]; /* stack trace */
        unsigned int trace_len;         /* stack trace length */
 };
@@ -252,13 +264,14 @@ static void kmemleak_disable(void);
 /*
  * Print a warning and dump the stack trace.
  */
-#define kmemleak_warn(x...)    do {    \
-       pr_warning(x);                  \
-       dump_stack();                   \
+#define kmemleak_warn(x...)    do {            \
+       pr_warning(x);                          \
+       dump_stack();                           \
+       atomic_set(&kmemleak_warning, 1);       \
 } while (0)
 
 /*
- * Macro invoked when a serious kmemleak condition occured and cannot be
+ * Macro invoked when a serious kmemleak condition occurred and cannot be
  * recovered from. Kmemleak will be disabled and further allocation/freeing
  * tracing no longer available.
  */
@@ -306,19 +319,16 @@ static void hex_dump_object(struct seq_file *seq,
  * Newly created objects don't have any color assigned (object->count == -1)
  * before the next memory scan when they become white.
  */
-static int color_white(const struct kmemleak_object *object)
-{
-       return object->count != -1 && object->count < object->min_count;
-}
-
-static int color_gray(const struct kmemleak_object *object)
+static bool color_white(const struct kmemleak_object *object)
 {
-       return object->min_count != -1 && object->count >= object->min_count;
+       return object->count != KMEMLEAK_BLACK &&
+               object->count < object->min_count;
 }
 
-static int color_black(const struct kmemleak_object *object)
+static bool color_gray(const struct kmemleak_object *object)
 {
-       return object->min_count == -1;
+       return object->min_count != KMEMLEAK_BLACK &&
+               object->count >= object->min_count;
 }
 
 /*
@@ -326,9 +336,9 @@ static int color_black(const struct kmemleak_object *object)
  * not be deleted and have a minimum age to avoid false positives caused by
  * pointers temporarily stored in CPU registers.
  */
-static int unreferenced_object(struct kmemleak_object *object)
+static bool unreferenced_object(struct kmemleak_object *object)
 {
-       return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
+       return (color_white(object) && object->flags & OBJECT_ALLOCATED) &&
                time_before_eq(object->jiffies + jiffies_min_age,
                               jiffies_last_scan);
 }
@@ -341,11 +351,13 @@ static void print_unreferenced(struct seq_file *seq,
                               struct kmemleak_object *object)
 {
        int i;
+       unsigned int msecs_age = jiffies_to_msecs(jiffies - object->jiffies);
 
        seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n",
                   object->pointer, object->size);
-       seq_printf(seq, "  comm \"%s\", pid %d, jiffies %lu\n",
-                  object->comm, object->pid, object->jiffies);
+       seq_printf(seq, "  comm \"%s\", pid %d, jiffies %lu (age %d.%03ds)\n",
+                  object->comm, object->pid, object->jiffies,
+                  msecs_age / 1000, msecs_age % 1000);
        hex_dump_object(seq, object);
        seq_printf(seq, "  backtrace:\n");
 
@@ -374,6 +386,7 @@ static void dump_object_info(struct kmemleak_object *object)
        pr_notice("  min_count = %d\n", object->min_count);
        pr_notice("  count = %d\n", object->count);
        pr_notice("  flags = 0x%lx\n", object->flags);
+       pr_notice("  checksum = %d\n", object->checksum);
        pr_notice("  backtrace:\n");
        print_stack_trace(&trace, 4);
 }
@@ -396,7 +409,9 @@ static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
                object = prio_tree_entry(node, struct kmemleak_object,
                                         tree_node);
                if (!alias && object->pointer != ptr) {
-                       kmemleak_warn("Found object by alias");
+                       kmemleak_warn("Found object by alias at 0x%08lx\n",
+                                     ptr);
+                       dump_object_info(object);
                        object = NULL;
                }
        } else
@@ -504,9 +519,10 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
        struct kmemleak_object *object;
        struct prio_tree_node *node;
 
-       object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK);
+       object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
        if (!object) {
-               kmemleak_stop("Cannot allocate a kmemleak_object structure\n");
+               pr_warning("Cannot allocate a kmemleak_object structure\n");
+               kmemleak_disable();
                return NULL;
        }
 
@@ -515,12 +531,13 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
        INIT_HLIST_HEAD(&object->area_list);
        spin_lock_init(&object->lock);
        atomic_set(&object->use_count, 1);
-       object->flags = OBJECT_ALLOCATED | OBJECT_NEW;
+       object->flags = OBJECT_ALLOCATED;
        object->pointer = ptr;
        object->size = size;
        object->min_count = min_count;
-       object->count = -1;                     /* no color initially */
+       object->count = 0;                      /* white color initially */
        object->jiffies = jiffies;
+       object->checksum = 0;
 
        /* task information */
        if (in_irq()) {
@@ -548,6 +565,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
        object->tree_node.last = ptr + size - 1;
 
        write_lock_irqsave(&kmemleak_lock, flags);
+
        min_addr = min(min_addr, ptr);
        max_addr = max(max_addr, ptr + size);
        node = prio_tree_insert(&object_tree_root, &object->tree_node);
@@ -558,14 +576,12 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
         * random memory blocks.
         */
        if (node != &object->tree_node) {
-               unsigned long flags;
-
                kmemleak_stop("Cannot insert 0x%lx into the object search tree "
                              "(already existing)\n", ptr);
                object = lookup_object(ptr, 1);
-               spin_lock_irqsave(&object->lock, flags);
+               spin_lock(&object->lock);
                dump_object_info(object);
-               spin_unlock_irqrestore(&object->lock, flags);
+               spin_unlock(&object->lock);
 
                goto out;
        }
@@ -659,75 +675,82 @@ static void delete_object_part(unsigned long ptr, size_t size)
 
        put_object(object);
 }
-/*
- * Make a object permanently as gray-colored so that it can no longer be
- * reported as a leak. This is used in general to mark a false positive.
- */
-static void make_gray_object(unsigned long ptr)
+
+static void __paint_it(struct kmemleak_object *object, int color)
+{
+       object->min_count = color;
+       if (color == KMEMLEAK_BLACK)
+               object->flags |= OBJECT_NO_SCAN;
+}
+
+static void paint_it(struct kmemleak_object *object, int color)
 {
        unsigned long flags;
+
+       spin_lock_irqsave(&object->lock, flags);
+       __paint_it(object, color);
+       spin_unlock_irqrestore(&object->lock, flags);
+}
+
+static void paint_ptr(unsigned long ptr, int color)
+{
        struct kmemleak_object *object;
 
        object = find_and_get_object(ptr, 0);
        if (!object) {
-               kmemleak_warn("Graying unknown object at 0x%08lx\n", ptr);
+               kmemleak_warn("Trying to color unknown object "
+                             "at 0x%08lx as %s\n", ptr,
+                             (color == KMEMLEAK_GREY) ? "Grey" :
+                             (color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
                return;
        }
-
-       spin_lock_irqsave(&object->lock, flags);
-       object->min_count = 0;
-       spin_unlock_irqrestore(&object->lock, flags);
+       paint_it(object, color);
        put_object(object);
 }
 
 /*
+ * Mark an object permanently as gray-colored so that it can no longer be
+ * reported as a leak. This is used in general to mark a false positive.
+ */
+static void make_gray_object(unsigned long ptr)
+{
+       paint_ptr(ptr, KMEMLEAK_GREY);
+}
+
+/*
  * Mark the object as black-colored so that it is ignored from scans and
  * reporting.
  */
 static void make_black_object(unsigned long ptr)
 {
-       unsigned long flags;
-       struct kmemleak_object *object;
-
-       object = find_and_get_object(ptr, 0);
-       if (!object) {
-               kmemleak_warn("Blacking unknown object at 0x%08lx\n", ptr);
-               return;
-       }
-
-       spin_lock_irqsave(&object->lock, flags);
-       object->min_count = -1;
-       object->flags |= OBJECT_NO_SCAN;
-       spin_unlock_irqrestore(&object->lock, flags);
-       put_object(object);
+       paint_ptr(ptr, KMEMLEAK_BLACK);
 }
 
 /*
  * Add a scanning area to the object. If at least one such area is added,
  * kmemleak will only scan these ranges rather than the whole memory block.
  */
-static void add_scan_area(unsigned long ptr, unsigned long offset,
-                         size_t length, gfp_t gfp)
+static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
 {
        unsigned long flags;
        struct kmemleak_object *object;
        struct kmemleak_scan_area *area;
 
-       object = find_and_get_object(ptr, 0);
+       object = find_and_get_object(ptr, 1);
        if (!object) {
                kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n",
                              ptr);
                return;
        }
 
-       area = kmem_cache_alloc(scan_area_cache, gfp & GFP_KMEMLEAK_MASK);
+       area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
        if (!area) {
-               kmemleak_warn("Cannot allocate a scan area\n");
+               pr_warning("Cannot allocate a scan area\n");
                goto out;
        }
 
        spin_lock_irqsave(&object->lock, flags);
-       if (offset + length > object->size) {
+       if (ptr + size > object->pointer + object->size) {
                kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr);
                dump_object_info(object);
                kmem_cache_free(scan_area_cache, area);
@@ -735,8 +758,8 @@ static void add_scan_area(unsigned long ptr, unsigned long offset,
        }
 
        INIT_HLIST_NODE(&area->node);
-       area->offset = offset;
-       area->length = length;
+       area->start = ptr;
+       area->size = size;
 
        hlist_add_head(&area->node, &object->area_list);
 out_unlock:
@@ -772,13 +795,18 @@ static void object_no_scan(unsigned long ptr)
  * processed later once kmemleak is fully initialized.
  */
 static void __init log_early(int op_type, const void *ptr, size_t size,
-                            int min_count, unsigned long offset, size_t length)
+                            int min_count)
 {
        unsigned long flags;
        struct early_log *log;
 
+       if (atomic_read(&kmemleak_error)) {
+               /* kmemleak stopped recording, just count the requests */
+               crt_early_log++;
+               return;
+       }
+
        if (crt_early_log >= ARRAY_SIZE(early_log)) {
-               pr_warning("Early log buffer exceeded\n");
                kmemleak_disable();
                return;
        }
@@ -793,10 +821,7 @@ static void __init log_early(int op_type, const void *ptr, size_t size,
        log->ptr = ptr;
        log->size = size;
        log->min_count = min_count;
-       log->offset = offset;
-       log->length = length;
-       if (op_type == KMEMLEAK_ALLOC)
-               log->trace_len = __save_stack_trace(log->trace);
+       log->trace_len = __save_stack_trace(log->trace);
        crt_early_log++;
        local_irq_restore(flags);
 }
@@ -818,19 +843,45 @@ static void early_alloc(struct early_log *log)
         */
        rcu_read_lock();
        object = create_object((unsigned long)log->ptr, log->size,
-                              log->min_count, GFP_KERNEL);
+                              log->min_count, GFP_ATOMIC);
+       if (!object)
+               goto out;
        spin_lock_irqsave(&object->lock, flags);
        for (i = 0; i < log->trace_len; i++)
                object->trace[i] = log->trace[i];
        object->trace_len = log->trace_len;
        spin_unlock_irqrestore(&object->lock, flags);
+out:
        rcu_read_unlock();
 }
 
 /*
- * Memory allocation function callback. This function is called from the
- * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
- * vmalloc etc.).
+ * Log an early allocated block and populate the stack trace.
+ */
+static void early_alloc_percpu(struct early_log *log)
+{
+       unsigned int cpu;
+       const void __percpu *ptr = log->ptr;
+
+       for_each_possible_cpu(cpu) {
+               log->ptr = per_cpu_ptr(ptr, cpu);
+               early_alloc(log);
+       }
+}
+
+/**
+ * kmemleak_alloc - register a newly allocated object
+ * @ptr:       pointer to beginning of the object
+ * @size:      size of the object
+ * @min_count: minimum number of references to this object. If during memory
+ *             scanning a number of references less than @min_count is found,
+ *             the object is reported as a memory leak. If @min_count is 0,
+ *             the object is never reported as a leak. If @min_count is -1,
+ *             the object is ignored (not scanned and not reported as a leak)
+ * @gfp:       kmalloc() flags used for kmemleak internal memory allocations
+ *
+ * This function is called from the kernel allocators when a new object
+ * (memory block) is allocated (kmem_cache_alloc, kmalloc, vmalloc etc.).
  */
 void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
                          gfp_t gfp)
@@ -840,13 +891,44 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
        if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
                create_object((unsigned long)ptr, size, min_count, gfp);
        else if (atomic_read(&kmemleak_early_log))
-               log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0);
+               log_early(KMEMLEAK_ALLOC, ptr, size, min_count);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc);
 
-/*
- * Memory freeing function callback. This function is called from the kernel
- * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
+/**
+ * kmemleak_alloc_percpu - register a newly allocated __percpu object
+ * @ptr:       __percpu pointer to beginning of the object
+ * @size:      size of the object
+ *
+ * This function is called from the kernel percpu allocator when a new object
+ * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL
+ * allocation.
+ */
+void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
+{
+       unsigned int cpu;
+
+       pr_debug("%s(0x%p, %zu)\n", __func__, ptr, size);
+
+       /*
+        * Percpu allocations are only scanned and not reported as leaks
+        * (min_count is set to 0).
+        */
+       if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+               for_each_possible_cpu(cpu)
+                       create_object((unsigned long)per_cpu_ptr(ptr, cpu),
+                                     size, 0, GFP_KERNEL);
+       else if (atomic_read(&kmemleak_early_log))
+               log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
+
+/**
+ * kmemleak_free - unregister a previously registered object
+ * @ptr:       pointer to beginning of the object
+ *
+ * This function is called from the kernel allocators when an object (memory
+ * block) is freed (kmem_cache_free, kfree, vfree etc.).
  */
 void __ref kmemleak_free(const void *ptr)
 {
@@ -855,13 +937,18 @@ void __ref kmemleak_free(const void *ptr)
        if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
                delete_object_full((unsigned long)ptr);
        else if (atomic_read(&kmemleak_early_log))
-               log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
+               log_early(KMEMLEAK_FREE, ptr, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free);
 
-/*
- * Partial memory freeing function callback. This function is usually called
- * from bootmem allocator when (part of) a memory block is freed.
+/**
+ * kmemleak_free_part - partially unregister a previously registered object
+ * @ptr:       pointer to the beginning or inside the object. This also
+ *             represents the start of the range to be freed
+ * @size:      size to be unregistered
+ *
+ * This function is called when only a part of a memory block is freed
+ * (usually from the bootmem allocator).
  */
 void __ref kmemleak_free_part(const void *ptr, size_t size)
 {
@@ -870,13 +957,38 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
        if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
                delete_object_part((unsigned long)ptr, size);
        else if (atomic_read(&kmemleak_early_log))
-               log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
+               log_early(KMEMLEAK_FREE_PART, ptr, size, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_part);
 
-/*
- * Mark an already allocated memory block as a false positive. This will cause
- * the block to no longer be reported as leak and always be scanned.
+/**
+ * kmemleak_free_percpu - unregister a previously registered __percpu object
+ * @ptr:       __percpu pointer to beginning of the object
+ *
+ * This function is called from the kernel percpu allocator when an object
+ * (memory block) is freed (free_percpu).
+ */
+void __ref kmemleak_free_percpu(const void __percpu *ptr)
+{
+       unsigned int cpu;
+
+       pr_debug("%s(0x%p)\n", __func__, ptr);
+
+       if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+               for_each_possible_cpu(cpu)
+                       delete_object_full((unsigned long)per_cpu_ptr(ptr,
+                                                                     cpu));
+       else if (atomic_read(&kmemleak_early_log))
+               log_early(KMEMLEAK_FREE_PERCPU, ptr, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
+
+/**
+ * kmemleak_not_leak - mark an allocated object as false positive
+ * @ptr:       pointer to beginning of the object
+ *
+ * Calling this function on an object will cause the memory block to no longer
+ * be reported as leak and always be scanned.
  */
 void __ref kmemleak_not_leak(const void *ptr)
 {
@@ -885,14 +997,18 @@ void __ref kmemleak_not_leak(const void *ptr)
        if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
                make_gray_object((unsigned long)ptr);
        else if (atomic_read(&kmemleak_early_log))
-               log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0);
+               log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_not_leak);
 
-/*
- * Ignore a memory block. This is usually done when it is known that the
- * corresponding block is not a leak and does not contain any references to
- * other allocated memory blocks.
+/**
+ * kmemleak_ignore - ignore an allocated object
+ * @ptr:       pointer to beginning of the object
+ *
+ * Calling this function on an object will cause the memory block to be
+ * ignored (not scanned and not reported as a leak). This is usually done when
+ * it is known that the corresponding block is not a leak and does not contain
+ * any references to other allocated memory blocks.
  */
 void __ref kmemleak_ignore(const void *ptr)
 {
@@ -901,27 +1017,40 @@ void __ref kmemleak_ignore(const void *ptr)
        if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
                make_black_object((unsigned long)ptr);
        else if (atomic_read(&kmemleak_early_log))
-               log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0);
+               log_early(KMEMLEAK_IGNORE, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_ignore);
 
-/*
- * Limit the range to be scanned in an allocated memory block.
+/**
+ * kmemleak_scan_area - limit the range to be scanned in an allocated object
+ * @ptr:       pointer to beginning or inside the object. This also
+ *             represents the start of the scan area
+ * @size:      size of the scan area
+ * @gfp:       kmalloc() flags used for kmemleak internal memory allocations
+ *
+ * This function is used when it is known that only certain parts of an object
+ * contain references to other objects. Kmemleak will only scan these areas
+ * reducing the number false negatives.
  */
-void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
-                             size_t length, gfp_t gfp)
+void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 {
        pr_debug("%s(0x%p)\n", __func__, ptr);
 
-       if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
-               add_scan_area((unsigned long)ptr, offset, length, gfp);
+       if (atomic_read(&kmemleak_enabled) && ptr && size && !IS_ERR(ptr))
+               add_scan_area((unsigned long)ptr, size, gfp);
        else if (atomic_read(&kmemleak_early_log))
-               log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length);
+               log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0);
 }
 EXPORT_SYMBOL(kmemleak_scan_area);
 
-/*
- * Inform kmemleak not to scan the given memory block.
+/**
+ * kmemleak_no_scan - do not scan an allocated object
+ * @ptr:       pointer to beginning of the object
+ *
+ * This function notifies kmemleak not to scan the given memory block. Useful
+ * in situations where it is known that the given object does not contain any
+ * references to other objects. Kmemleak will not scan such objects reducing
+ * the number of false negatives.
  */
 void __ref kmemleak_no_scan(const void *ptr)
 {
@@ -930,13 +1059,27 @@ void __ref kmemleak_no_scan(const void *ptr)
        if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
                object_no_scan((unsigned long)ptr);
        else if (atomic_read(&kmemleak_early_log))
-               log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0);
+               log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_no_scan);
 
 /*
+ * Update an object's checksum and return true if it was modified.
+ */
+static bool update_checksum(struct kmemleak_object *object)
+{
+       u32 old_csum = object->checksum;
+
+       if (!kmemcheck_is_obj_initialized(object->pointer, object->size))
+               return false;
+
+       object->checksum = crc32(0, (void *)object->pointer, object->size);
+       return object->checksum != old_csum;
+}
+
+/*
  * Memory scanning is a long process and it needs to be interruptable. This
- * function checks whether such interrupt condition occured.
+ * function checks whether such interrupt condition occurred.
  */
 static int scan_should_stop(void)
 {
@@ -967,15 +1110,22 @@ static void scan_block(void *_start, void *_end,
        unsigned long *end = _end - (BYTES_PER_POINTER - 1);
 
        for (ptr = start; ptr < end; ptr++) {
-               unsigned long flags;
-               unsigned long pointer = *ptr;
                struct kmemleak_object *object;
+               unsigned long flags;
+               unsigned long pointer;
 
                if (allow_resched)
                        cond_resched();
                if (scan_should_stop())
                        break;
 
+               /* don't scan uninitialized memory */
+               if (!kmemcheck_is_obj_initialized((unsigned long)ptr,
+                                                 BYTES_PER_POINTER))
+                       continue;
+
+               pointer = *ptr;
+
                object = find_and_get_object(pointer, 1);
                if (!object)
                        continue;
@@ -1006,11 +1156,14 @@ static void scan_block(void *_start, void *_end,
                 * added to the gray_list.
                 */
                object->count++;
-               if (color_gray(object))
+               if (color_gray(object)) {
                        list_add_tail(&object->gray_list, &gray_list);
-               else
-                       put_object(object);
+                       spin_unlock_irqrestore(&object->lock, flags);
+                       continue;
+               }
+
                spin_unlock_irqrestore(&object->lock, flags);
+               put_object(object);
        }
 }
 
@@ -1025,8 +1178,8 @@ static void scan_object(struct kmemleak_object *object)
        unsigned long flags;
 
        /*
-        * Once the object->lock is aquired, the corresponding memory block
-        * cannot be freed (the same lock is aquired in delete_object).
+        * Once the object->lock is acquired, the corresponding memory block
+        * cannot be freed (the same lock is acquired in delete_object).
         */
        spin_lock_irqsave(&object->lock, flags);
        if (object->flags & OBJECT_NO_SCAN)
@@ -1050,14 +1203,47 @@ static void scan_object(struct kmemleak_object *object)
                }
        } else
                hlist_for_each_entry(area, elem, &object->area_list, node)
-                       scan_block((void *)(object->pointer + area->offset),
-                                  (void *)(object->pointer + area->offset
-                                           + area->length), object, 0);
+                       scan_block((void *)area->start,
+                                  (void *)(area->start + area->size),
+                                  object, 0);
 out:
        spin_unlock_irqrestore(&object->lock, flags);
 }
 
 /*
+ * Scan the objects already referenced (gray objects). More objects will be
+ * referenced and, if there are no memory leaks, all the objects are scanned.
+ */
+static void scan_gray_list(void)
+{
+       struct kmemleak_object *object, *tmp;
+
+       /*
+        * The list traversal is safe for both tail additions and removals
+        * from inside the loop. The kmemleak objects cannot be freed from
+        * outside the loop because their use_count was incremented.
+        */
+       object = list_entry(gray_list.next, typeof(*object), gray_list);
+       while (&object->gray_list != &gray_list) {
+               cond_resched();
+
+               /* may add new objects to the list */
+               if (!scan_should_stop())
+                       scan_object(object);
+
+               tmp = list_entry(object->gray_list.next, typeof(*object),
+                                gray_list);
+
+               /* remove the object from the list and release it */
+               list_del(&object->gray_list);
+               put_object(object);
+
+               object = tmp;
+       }
+       WARN_ON(!list_empty(&gray_list));
+}
+
+/*
  * Scan data sections and all the referenced memory blocks allocated via the
  * kernel's standard allocators. This function must be called with the
  * scan_mutex held.
@@ -1065,11 +1251,9 @@ out:
 static void kmemleak_scan(void)
 {
        unsigned long flags;
-       struct kmemleak_object *object, *tmp;
-       struct task_struct *task;
+       struct kmemleak_object *object;
        int i;
        int new_leaks = 0;
-       int gray_list_pass = 0;
 
        jiffies_last_scan = jiffies;
 
@@ -1090,7 +1274,6 @@ static void kmemleak_scan(void)
 #endif
                /* reset the reference count (whiten the object) */
                object->count = 0;
-               object->flags &= ~OBJECT_NEW;
                if (color_gray(object) && get_object(object))
                        list_add_tail(&object->gray_list, &gray_list);
 
@@ -1110,9 +1293,9 @@ static void kmemleak_scan(void)
 #endif
 
        /*
-        * Struct page scanning for each node. The code below is not yet safe
-        * with MEMORY_HOTPLUG.
+        * Struct page scanning for each node.
         */
+       lock_memory_hotplug();
        for_each_online_node(i) {
                pg_data_t *pgdat = NODE_DATA(i);
                unsigned long start_pfn = pgdat->node_start_pfn;
@@ -1131,78 +1314,54 @@ static void kmemleak_scan(void)
                        scan_block(page, page + 1, NULL, 1);
                }
        }
+       unlock_memory_hotplug();
 
        /*
-        * Scanning the task stacks may introduce false negatives and it is
-        * not enabled by default.
+        * Scanning the task stacks (may introduce false negatives).
         */
        if (kmemleak_stack_scan) {
+               struct task_struct *p, *g;
+
                read_lock(&tasklist_lock);
-               for_each_process(task)
-                       scan_block(task_stack_page(task),
-                                  task_stack_page(task) + THREAD_SIZE,
-                                  NULL, 0);
+               do_each_thread(g, p) {
+                       scan_block(task_stack_page(p), task_stack_page(p) +
+                                  THREAD_SIZE, NULL, 0);
+               } while_each_thread(g, p);
                read_unlock(&tasklist_lock);
        }
 
        /*
         * Scan the objects already referenced from the sections scanned
-        * above. More objects will be referenced and, if there are no memory
-        * leaks, all the objects will be scanned. The list traversal is safe
-        * for both tail additions and removals from inside the loop. The
-        * kmemleak objects cannot be freed from outside the loop because their
-        * use_count was increased.
+        * above.
         */
-repeat:
-       object = list_entry(gray_list.next, typeof(*object), gray_list);
-       while (&object->gray_list != &gray_list) {
-               cond_resched();
-
-               /* may add new objects to the list */
-               if (!scan_should_stop())
-                       scan_object(object);
-
-               tmp = list_entry(object->gray_list.next, typeof(*object),
-                                gray_list);
-
-               /* remove the object from the list and release it */
-               list_del(&object->gray_list);
-               put_object(object);
-
-               object = tmp;
-       }
-
-       if (scan_should_stop() || ++gray_list_pass >= GRAY_LIST_PASSES)
-               goto scan_end;
+       scan_gray_list();
 
        /*
-        * Check for new objects allocated during this scanning and add them
-        * to the gray list.
+        * Check for new or unreferenced objects modified since the previous
+        * scan and color them gray until the next scan.
         */
        rcu_read_lock();
        list_for_each_entry_rcu(object, &object_list, object_list) {
                spin_lock_irqsave(&object->lock, flags);
-               if ((object->flags & OBJECT_NEW) && !color_black(object) &&
-                   get_object(object)) {
-                       object->flags &= ~OBJECT_NEW;
+               if (color_white(object) && (object->flags & OBJECT_ALLOCATED)
+                   && update_checksum(object) && get_object(object)) {
+                       /* color it gray temporarily */
+                       object->count = object->min_count;
                        list_add_tail(&object->gray_list, &gray_list);
                }
                spin_unlock_irqrestore(&object->lock, flags);
        }
        rcu_read_unlock();
 
-       if (!list_empty(&gray_list))
-               goto repeat;
-
-scan_end:
-       WARN_ON(!list_empty(&gray_list));
+       /*
+        * Re-scan the gray list for modified unreferenced objects.
+        */
+       scan_gray_list();
 
        /*
-        * If scanning was stopped or new objects were being allocated at a
-        * higher rate than gray list scanning, do not report any new
-        * unreferenced objects.
+        * If scanning was stopped do not report any new unreferenced objects.
         */
-       if (scan_should_stop() || gray_list_pass >= GRAY_LIST_PASSES)
+       if (scan_should_stop())
                return;
 
        /*
@@ -1266,7 +1425,7 @@ static int kmemleak_scan_thread(void *arg)
  * Start the automatic memory scanning thread. This function must be called
  * with the scan_mutex held.
  */
-void start_scan_thread(void)
+static void start_scan_thread(void)
 {
        if (scan_thread)
                return;
@@ -1281,7 +1440,7 @@ void start_scan_thread(void)
  * Stop the automatic memory scanning thread. This function must be called
  * with the scan_mutex held.
  */
-void stop_scan_thread(void)
+static void stop_scan_thread(void)
 {
        if (scan_thread) {
                kthread_stop(scan_thread);
@@ -1329,9 +1488,12 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        ++(*pos);
 
        list_for_each_continue_rcu(n, &object_list) {
-               next_obj = list_entry(n, struct kmemleak_object, object_list);
-               if (get_object(next_obj))
+               struct kmemleak_object *obj =
+                       list_entry(n, struct kmemleak_object, object_list);
+               if (get_object(obj)) {
+                       next_obj = obj;
                        break;
+               }
        }
 
        put_object(prev_obj);
@@ -1379,9 +1541,6 @@ static const struct seq_operations kmemleak_seq_ops = {
 
 static int kmemleak_open(struct inode *inode, struct file *file)
 {
-       if (!atomic_read(&kmemleak_enabled))
-               return -EBUSY;
-
        return seq_open(file, &kmemleak_seq_ops);
 }
 
@@ -1412,6 +1571,28 @@ static int dump_str_object_info(const char *str)
 }
 
 /*
+ * We use grey instead of black to ensure we can do future scans on the same
+ * objects. If we did not do future scans these black objects could
+ * potentially contain references to newly allocated objects in the future and
+ * we'd end up with false positives.
+ */
+static void kmemleak_clear(void)
+{
+       struct kmemleak_object *object;
+       unsigned long flags;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(object, &object_list, object_list) {
+               spin_lock_irqsave(&object->lock, flags);
+               if ((object->flags & OBJECT_REPORTED) &&
+                   unreferenced_object(object))
+                       __paint_it(object, KMEMLEAK_GREY);
+               spin_unlock_irqrestore(&object->lock, flags);
+       }
+       rcu_read_unlock();
+}
+
+/*
  * File write operation to configure kmemleak at run-time. The following
  * commands can be written to the /sys/kernel/debug/kmemleak file:
  *   off       - disable kmemleak (irreversible)
@@ -1422,6 +1603,8 @@ static int dump_str_object_info(const char *str)
  *   scan=...  - set the automatic memory scanning period in seconds (0 to
  *               disable it)
  *   scan      - trigger a memory scan
+ *   clear     - mark all current reported unreferenced kmemleak objects as
+ *               grey to ignore printing them
  *   dump=...  - dump information about the object found at the given address
  */
 static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
@@ -1431,6 +1614,9 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
        int buf_size;
        int ret;
 
+       if (!atomic_read(&kmemleak_enabled))
+               return -EBUSY;
+
        buf_size = min(size, (sizeof(buf) - 1));
        if (strncpy_from_user(buf, user_buf, buf_size) < 0)
                return -EFAULT;
@@ -1463,6 +1649,8 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
                }
        } else if (strncmp(buf, "scan", 4) == 0)
                kmemleak_scan();
+       else if (strncmp(buf, "clear", 5) == 0)
+               kmemleak_clear();
        else if (strncmp(buf, "dump=", 5) == 0)
                ret = dump_str_object_info(buf + 5);
        else
@@ -1488,37 +1676,28 @@ static const struct file_operations kmemleak_fops = {
 };
 
 /*
- * Perform the freeing of the kmemleak internal objects after waiting for any
- * current memory scan to complete.
+ * Stop the memory scanning thread and free the kmemleak internal objects if
+ * no previous scan thread (otherwise, kmemleak may still have some useful
+ * information on memory leaks).
  */
-static int kmemleak_cleanup_thread(void *arg)
+static void kmemleak_do_cleanup(struct work_struct *work)
 {
        struct kmemleak_object *object;
+       bool cleanup = scan_thread == NULL;
 
        mutex_lock(&scan_mutex);
        stop_scan_thread();
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(object, &object_list, object_list)
-               delete_object_full(object->pointer);
-       rcu_read_unlock();
+       if (cleanup) {
+               rcu_read_lock();
+               list_for_each_entry_rcu(object, &object_list, object_list)
+                       delete_object_full(object->pointer);
+               rcu_read_unlock();
+       }
        mutex_unlock(&scan_mutex);
-
-       return 0;
 }
 
-/*
- * Start the clean-up thread.
- */
-static void kmemleak_cleanup(void)
-{
-       struct task_struct *cleanup_thread;
-
-       cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
-                                    "kmemleak-clean");
-       if (IS_ERR(cleanup_thread))
-               pr_warning("Failed to create the clean-up thread\n");
-}
+static DECLARE_WORK(cleanup_work, kmemleak_do_cleanup);
 
 /*
  * Disable kmemleak. No memory allocation/freeing will be traced once this
@@ -1531,12 +1710,11 @@ static void kmemleak_disable(void)
                return;
 
        /* stop any memory operation tracing */
-       atomic_set(&kmemleak_early_log, 0);
        atomic_set(&kmemleak_enabled, 0);
 
        /* check whether it is too early for a kernel thread */
        if (atomic_read(&kmemleak_initialized))
-               kmemleak_cleanup();
+               schedule_work(&cleanup_work);
 
        pr_info("Kernel memory leak detector disabled\n");
 }
@@ -1550,12 +1728,25 @@ static int kmemleak_boot_config(char *str)
                return -EINVAL;
        if (strcmp(str, "off") == 0)
                kmemleak_disable();
-       else if (strcmp(str, "on") != 0)
+       else if (strcmp(str, "on") == 0)
+               kmemleak_skip_disable = 1;
+       else
                return -EINVAL;
        return 0;
 }
 early_param("kmemleak", kmemleak_boot_config);
 
+static void __init print_log_trace(struct early_log *log)
+{
+       struct stack_trace trace;
+
+       trace.nr_entries = log->trace_len;
+       trace.entries = log->trace;
+
+       pr_notice("Early log backtrace:\n");
+       print_stack_trace(&trace, 2);
+}
+
 /*
  * Kmemleak initialization.
  */
@@ -1564,6 +1755,14 @@ void __init kmemleak_init(void)
        int i;
        unsigned long flags;
 
+#ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
+       if (!kmemleak_skip_disable) {
+               atomic_set(&kmemleak_early_log, 0);
+               kmemleak_disable();
+               return;
+       }
+#endif
+
        jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
        jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
 
@@ -1571,12 +1770,18 @@ void __init kmemleak_init(void)
        scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
        INIT_PRIO_TREE_ROOT(&object_tree_root);
 
+       if (crt_early_log >= ARRAY_SIZE(early_log))
+               pr_warning("Early log buffer exceeded (%d), please increase "
+                          "DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n", crt_early_log);
+
        /* the kernel is still in UP mode, so disabling the IRQs is enough */
        local_irq_save(flags);
-       if (!atomic_read(&kmemleak_error)) {
+       atomic_set(&kmemleak_early_log, 0);
+       if (atomic_read(&kmemleak_error)) {
+               local_irq_restore(flags);
+               return;
+       } else
                atomic_set(&kmemleak_enabled, 1);
-               atomic_set(&kmemleak_early_log, 0);
-       }
        local_irq_restore(flags);
 
        /*
@@ -1591,12 +1796,18 @@ void __init kmemleak_init(void)
                case KMEMLEAK_ALLOC:
                        early_alloc(log);
                        break;
+               case KMEMLEAK_ALLOC_PERCPU:
+                       early_alloc_percpu(log);
+                       break;
                case KMEMLEAK_FREE:
                        kmemleak_free(log->ptr);
                        break;
                case KMEMLEAK_FREE_PART:
                        kmemleak_free_part(log->ptr, log->size);
                        break;
+               case KMEMLEAK_FREE_PERCPU:
+                       kmemleak_free_percpu(log->ptr);
+                       break;
                case KMEMLEAK_NOT_LEAK:
                        kmemleak_not_leak(log->ptr);
                        break;
@@ -1604,14 +1815,19 @@ void __init kmemleak_init(void)
                        kmemleak_ignore(log->ptr);
                        break;
                case KMEMLEAK_SCAN_AREA:
-                       kmemleak_scan_area(log->ptr, log->offset, log->length,
-                                          GFP_KERNEL);
+                       kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
                        break;
                case KMEMLEAK_NO_SCAN:
                        kmemleak_no_scan(log->ptr);
                        break;
                default:
-                       WARN_ON(1);
+                       kmemleak_warn("Unknown early log operation: %d\n",
+                                     log->op_type);
+               }
+
+               if (atomic_read(&kmemleak_warning)) {
+                       print_log_trace(log);
+                       atomic_set(&kmemleak_warning, 0);
                }
        }
 }
@@ -1627,12 +1843,12 @@ static int __init kmemleak_late_init(void)
 
        if (atomic_read(&kmemleak_error)) {
                /*
-                * Some error occured and kmemleak was disabled. There is a
+                * Some error occurred and kmemleak was disabled. There is a
                 * small chance that kmemleak_disable() was called immediately
                 * after setting kmemleak_initialized and we may end up with
                 * two clean-up threads but serialized by scan_mutex.
                 */
-               kmemleak_cleanup();
+               schedule_work(&cleanup_work);
                return -ENOMEM;
        }