arm: tegra: register save and restore ops
[linux-3.10.git] / kernel / lockdep.c
index 938dc50..e16c45b 100644 (file)
 #include <linux/ftrace.h>
 #include <linux/stringify.h>
 #include <linux/bitops.h>
+#include <linux/gfp.h>
+#include <linux/kmemcheck.h>
+
 #include <asm/sections.h>
 
 #include "lockdep_internals.h"
 
 #define CREATE_TRACE_POINTS
-#include <trace/events/lockdep.h>
+#include <trace/events/lock.h>
 
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
@@ -72,11 +75,11 @@ module_param(lock_stat, int, 0644);
  * to use a raw spinlock - we really dont want the spinlock
  * code to recurse back into the lockdep code...
  */
-static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 static int graph_lock(void)
 {
-       __raw_spin_lock(&lockdep_lock);
+       arch_spin_lock(&lockdep_lock);
        /*
         * Make sure that if another CPU detected a bug while
         * walking the graph we dont change it (while the other
@@ -84,7 +87,7 @@ static int graph_lock(void)
         * dropped already)
         */
        if (!debug_locks) {
-               __raw_spin_unlock(&lockdep_lock);
+               arch_spin_unlock(&lockdep_lock);
                return 0;
        }
        /* prevent any recursions within lockdep from causing deadlocks */
@@ -94,11 +97,16 @@ static int graph_lock(void)
 
 static inline int graph_unlock(void)
 {
-       if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
+       if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) {
+               /*
+                * The lockdep graph lock isn't locked while we expect it to
+                * be, we're confused now, bye!
+                */
                return DEBUG_LOCKS_WARN_ON(1);
+       }
 
        current->lockdep_recursion--;
-       __raw_spin_unlock(&lockdep_lock);
+       arch_spin_unlock(&lockdep_lock);
        return 0;
 }
 
@@ -110,7 +118,7 @@ static inline int debug_locks_off_graph_unlock(void)
 {
        int ret = debug_locks_off();
 
-       __raw_spin_unlock(&lockdep_lock);
+       arch_spin_unlock(&lockdep_lock);
 
        return ret;
 }
@@ -118,7 +126,7 @@ static inline int debug_locks_off_graph_unlock(void)
 static int lockdep_initialized;
 
 unsigned long nr_list_entries;
-struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
+static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 
 /*
  * All data structures here are protected by the global debug_lock.
@@ -132,6 +140,9 @@ static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
 static inline struct lock_class *hlock_class(struct held_lock *hlock)
 {
        if (!hlock->class_idx) {
+               /*
+                * Someone passed in garbage, we give up.
+                */
                DEBUG_LOCKS_WARN_ON(1);
                return NULL;
        }
@@ -139,7 +150,13 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
 }
 
 #ifdef CONFIG_LOCK_STAT
-static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
+static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
+                     cpu_lock_stats);
+
+static inline u64 lockstat_clock(void)
+{
+       return local_clock();
+}
 
 static int lock_point(unsigned long points[], unsigned long ip)
 {
@@ -157,12 +174,12 @@ static int lock_point(unsigned long points[], unsigned long ip)
        return i;
 }
 
-static void lock_time_inc(struct lock_time *lt, s64 time)
+static void lock_time_inc(struct lock_time *lt, u64 time)
 {
        if (time > lt->max)
                lt->max = time;
 
-       if (time < lt->min || !lt->min)
+       if (time < lt->min || !lt->nr)
                lt->min = time;
 
        lt->total += time;
@@ -171,8 +188,15 @@ static void lock_time_inc(struct lock_time *lt, s64 time)
 
 static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
 {
-       dst->min += src->min;
-       dst->max += src->max;
+       if (!src->nr)
+               return;
+
+       if (src->max > dst->max)
+               dst->max = src->max;
+
+       if (src->min < dst->min || !dst->nr)
+               dst->min = src->min;
+
        dst->total += src->total;
        dst->nr += src->nr;
 }
@@ -185,7 +209,7 @@ struct lock_class_stats lock_stats(struct lock_class *class)
        memset(&stats, 0, sizeof(struct lock_class_stats));
        for_each_possible_cpu(cpu) {
                struct lock_class_stats *pcs =
-                       &per_cpu(lock_stats, cpu)[class - lock_classes];
+                       &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
 
                for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
                        stats.contention_point[i] += pcs->contention_point[i];
@@ -212,7 +236,7 @@ void clear_lock_stats(struct lock_class *class)
 
        for_each_possible_cpu(cpu) {
                struct lock_class_stats *cpu_stats =
-                       &per_cpu(lock_stats, cpu)[class - lock_classes];
+                       &per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
 
                memset(cpu_stats, 0, sizeof(struct lock_class_stats));
        }
@@ -222,23 +246,23 @@ void clear_lock_stats(struct lock_class *class)
 
 static struct lock_class_stats *get_lock_stats(struct lock_class *class)
 {
-       return &get_cpu_var(lock_stats)[class - lock_classes];
+       return &get_cpu_var(cpu_lock_stats)[class - lock_classes];
 }
 
 static void put_lock_stats(struct lock_class_stats *stats)
 {
-       put_cpu_var(lock_stats);
+       put_cpu_var(cpu_lock_stats);
 }
 
 static void lock_release_holdtime(struct held_lock *hlock)
 {
        struct lock_class_stats *stats;
-       s64 holdtime;
+       u64 holdtime;
 
        if (!lock_stat)
                return;
 
-       holdtime = sched_clock() - hlock->holdtime_stamp;
+       holdtime = lockstat_clock() - hlock->holdtime_stamp;
 
        stats = get_lock_stats(hlock_class(hlock));
        if (hlock->read)
@@ -356,6 +380,13 @@ static int verbose(struct lock_class *class)
 unsigned long nr_stack_trace_entries;
 static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
 
+static void print_lockdep_off(const char *bug_msg)
+{
+       printk(KERN_DEBUG "%s\n", bug_msg);
+       printk(KERN_DEBUG "turning off the locking correctness validator.\n");
+       printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n");
+}
+
 static int save_trace(struct stack_trace *trace)
 {
        trace->nr_entries = 0;
@@ -366,16 +397,26 @@ static int save_trace(struct stack_trace *trace)
 
        save_stack_trace(trace);
 
+       /*
+        * Some daft arches put -1 at the end to indicate its a full trace.
+        *
+        * <rant> this is buggy anyway, since it takes a whole extra entry so a
+        * complete trace that maxes out the entries provided will be reported
+        * as incomplete, friggin useless </rant>
+        */
+       if (trace->nr_entries != 0 &&
+           trace->entries[trace->nr_entries-1] == ULONG_MAX)
+               trace->nr_entries--;
+
        trace->max_entries = trace->nr_entries;
 
        nr_stack_trace_entries += trace->nr_entries;
 
-       if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
+       if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
                if (!debug_locks_off_graph_unlock())
                        return 0;
 
-               printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
-               printk("turning off the locking correctness validator.\n");
+               print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
                dump_stack();
 
                return 0;
@@ -388,20 +429,6 @@ unsigned int nr_hardirq_chains;
 unsigned int nr_softirq_chains;
 unsigned int nr_process_chains;
 unsigned int max_lockdep_depth;
-unsigned int max_recursion_depth;
-
-static unsigned int lockdep_dependency_gen_id;
-
-static bool lockdep_dependency_visit(struct lock_class *source,
-                                    unsigned int depth)
-{
-       if (!depth)
-               lockdep_dependency_gen_id++;
-       if (source->dep_gen_id == lockdep_dependency_gen_id)
-               return true;
-       source->dep_gen_id = lockdep_dependency_gen_id;
-       return false;
-}
 
 #ifdef CONFIG_DEBUG_LOCKDEP
 /*
@@ -410,6 +437,7 @@ static bool lockdep_dependency_visit(struct lock_class *source,
  * about it later on, in lockdep_info().
  */
 static int lockdep_init_error;
+static const char *lock_init_error;
 static unsigned long lockdep_init_trace_data[20];
 static struct stack_trace lockdep_init_trace = {
        .max_entries = ARRAY_SIZE(lockdep_init_trace_data),
@@ -419,23 +447,7 @@ static struct stack_trace lockdep_init_trace = {
 /*
  * Various lockdep statistics:
  */
-atomic_t chain_lookup_hits;
-atomic_t chain_lookup_misses;
-atomic_t hardirqs_on_events;
-atomic_t hardirqs_off_events;
-atomic_t redundant_hardirqs_on;
-atomic_t redundant_hardirqs_off;
-atomic_t softirqs_on_events;
-atomic_t softirqs_off_events;
-atomic_t redundant_softirqs_on;
-atomic_t redundant_softirqs_off;
-atomic_t nr_unused_locks;
-atomic_t nr_cyclic_checks;
-atomic_t nr_cyclic_check_recursions;
-atomic_t nr_find_usage_forwards_checks;
-atomic_t nr_find_usage_forwards_recursions;
-atomic_t nr_find_usage_backwards_checks;
-atomic_t nr_find_usage_backwards_recursions;
+DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
 #endif
 
 /*
@@ -494,24 +506,32 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
        usage[i] = '\0';
 }
 
-static void print_lock_name(struct lock_class *class)
+static void __print_lock_name(struct lock_class *class)
 {
-       char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
+       char str[KSYM_NAME_LEN];
        const char *name;
 
-       get_usage_chars(class, usage);
-
        name = class->name;
        if (!name) {
                name = __get_key_name(class->key, str);
-               printk(" (%s", name);
+               printk("%s", name);
        } else {
-               printk(" (%s", name);
+               printk("%s", name);
                if (class->name_version > 1)
                        printk("#%d", class->name_version);
                if (class->subclass)
                        printk("/%d", class->subclass);
        }
+}
+
+static void print_lock_name(struct lock_class *class)
+{
+       char usage[LOCK_USAGE_CHARS];
+
+       get_usage_chars(class, usage);
+
+       printk(" (");
+       __print_lock_name(class);
        printk("){%s}", usage);
 }
 
@@ -551,93 +571,12 @@ static void lockdep_print_held_locks(struct task_struct *curr)
        }
 }
 
-static void print_lock_class_header(struct lock_class *class, int depth)
-{
-       int bit;
-
-       printk("%*s->", depth, "");
-       print_lock_name(class);
-       printk(" ops: %lu", class->ops);
-       printk(" {\n");
-
-       for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
-               if (class->usage_mask & (1 << bit)) {
-                       int len = depth;
-
-                       len += printk("%*s   %s", depth, "", usage_str[bit]);
-                       len += printk(" at:\n");
-                       print_stack_trace(class->usage_traces + bit, len);
-               }
-       }
-       printk("%*s }\n", depth, "");
-
-       printk("%*s ... key      at: ",depth,"");
-       print_ip_sym((unsigned long)class->key);
-}
-
-/*
- * printk the shortest lock dependencies from @start to @end in reverse order:
- */
-static void __used
-print_shortest_lock_dependencies(struct lock_list *leaf,
-                               struct lock_list *root)
+static void print_kernel_ident(void)
 {
-       struct lock_list *entry = leaf;
-       int depth;
-
-       /*compute depth from generated tree by BFS*/
-       depth = get_lock_depth(leaf);
-
-       do {
-               print_lock_class_header(entry->class, depth);
-               printk("%*s ... acquired at:\n", depth, "");
-               print_stack_trace(&entry->trace, 2);
-               printk("\n");
-
-               if (depth == 0 && (entry != root)) {
-                       printk("lockdep:%s bad BFS generated tree\n", __func__);
-                       break;
-               }
-
-               entry = get_lock_parent(entry);
-               depth--;
-       } while (entry && (depth >= 0));
-
-       return;
-}
-/*
- * printk all lock dependencies starting at <entry>:
- */
-static void __used
-print_lock_dependencies(struct lock_class *class, int depth)
-{
-       struct lock_list *entry;
-
-       if (lockdep_dependency_visit(class, depth))
-               return;
-
-       if (DEBUG_LOCKS_WARN_ON(depth >= 20))
-               return;
-
-       print_lock_class_header(class, depth);
-
-       list_for_each_entry(entry, &class->locks_after, entry) {
-               if (DEBUG_LOCKS_WARN_ON(!entry->class))
-                       return;
-
-               print_lock_dependencies(entry->class, depth + 1);
-
-               printk("%*s ... acquired at:\n",depth,"");
-               print_stack_trace(&entry->trace, 2);
-               printk("\n");
-       }
-}
-
-static void print_kernel_version(void)
-{
-       printk("%s %.*s\n", init_utsname()->release,
+       printk("%s %.*s %s\n", init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
-               init_utsname()->version);
+               init_utsname()->version,
+               print_tainted());
 }
 
 static int very_verbose(struct lock_class *class)
@@ -656,9 +595,6 @@ static int static_obj(void *obj)
        unsigned long start = (unsigned long) &_stext,
                      end   = (unsigned long) &_end,
                      addr  = (unsigned long) obj;
-#ifdef CONFIG_SMP
-       int i;
-#endif
 
        /*
         * static variable?
@@ -666,24 +602,19 @@ static int static_obj(void *obj)
        if ((addr >= start) && (addr < end))
                return 1;
 
-#ifdef CONFIG_SMP
+       if (arch_is_kernel_data(addr))
+               return 1;
+
        /*
-        * percpu var?
+        * in-kernel percpu var?
         */
-       for_each_possible_cpu(i) {
-               start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
-               end   = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM
-                                       + per_cpu_offset(i);
-
-               if ((addr >= start) && (addr < end))
-                       return 1;
-       }
-#endif
+       if (is_kernel_percpu_address(addr))
+               return 1;
 
        /*
-        * module var?
+        * module static or percpu var?
         */
-       return is_module_address(addr);
+       return is_module_address(addr) || is_module_percpu_address(addr);
 }
 
 /*
@@ -729,10 +660,21 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
        if (unlikely(!lockdep_initialized)) {
                lockdep_init();
                lockdep_init_error = 1;
+               lock_init_error = lock->name;
                save_stack_trace(&lockdep_init_trace);
        }
 #endif
 
+       if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
+               debug_locks_off();
+               printk(KERN_ERR
+                       "BUG: looking up invalid subclass: %u\n", subclass);
+               printk(KERN_ERR
+                       "turning off the locking correctness validator.\n");
+               dump_stack();
+               return NULL;
+       }
+
        /*
         * Static locks do not have their class-keys yet - for them the key
         * is the lock object itself:
@@ -759,6 +701,10 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
         */
        list_for_each_entry(class, hash_head, hash_entry) {
                if (class->key == key) {
+                       /*
+                        * Huh! same key, different name? Did someone trample
+                        * on some memory? We're most confused.
+                        */
                        WARN_ON_ONCE(class->name != lock->name);
                        return class;
                }
@@ -782,7 +728,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 
        class = look_up_lock_class(lock, subclass);
        if (likely(class))
-               return class;
+               goto out_set_class_cache;
 
        /*
         * Debug-check: all keys must be persistent!
@@ -823,13 +769,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
                }
                raw_local_irq_restore(flags);
 
-               printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
-               printk("turning off the locking correctness validator.\n");
+               print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
                dump_stack();
                return NULL;
        }
        class = lock_classes + nr_lock_classes++;
-       debug_atomic_inc(&nr_unused_locks);
+       debug_atomic_inc(nr_unused_locks);
        class->key = key;
        class->name = lock->name;
        class->subclass = subclass;
@@ -867,9 +812,16 @@ out_unlock_set:
        graph_unlock();
        raw_local_irq_restore(flags);
 
+out_set_class_cache:
        if (!subclass || force)
-               lock->class_cache = class;
+               lock->class_cache[0] = class;
+       else if (subclass < NR_LOCKDEP_CACHING_CLASSES)
+               lock->class_cache[subclass] = class;
 
+       /*
+        * Hash collision, did we smoke some? We found a class with a matching
+        * hash but the subclass -- which is hashed in -- didn't match.
+        */
        if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass))
                return NULL;
 
@@ -887,8 +839,7 @@ static struct lock_list *alloc_list_entry(void)
                if (!debug_locks_off_graph_unlock())
                        return NULL;
 
-               printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
-               printk("turning off the locking correctness validator.\n");
+               print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!");
                dump_stack();
                return NULL;
        }
@@ -899,7 +850,8 @@ static struct lock_list *alloc_list_entry(void)
  * Add a new dependency to the head of the list:
  */
 static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
-                           struct list_head *head, unsigned long ip, int distance)
+                           struct list_head *head, unsigned long ip,
+                           int distance, struct stack_trace *trace)
 {
        struct lock_list *entry;
        /*
@@ -910,11 +862,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
        if (!entry)
                return 0;
 
-       if (!save_trace(&entry->trace))
-               return 0;
-
        entry->class = this;
        entry->distance = distance;
+       entry->trace = *trace;
        /*
         * Since we never remove from the dependency list, the list can
         * be walked lockless by other CPUs, it's only allocation
@@ -927,14 +877,112 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
        return 1;
 }
 
-unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)];
-static struct circular_queue  lock_cq;
+/*
+ * For good efficiency of modular, we use power of 2
+ */
+#define MAX_CIRCULAR_QUEUE_SIZE                4096UL
+#define CQ_MASK                                (MAX_CIRCULAR_QUEUE_SIZE-1)
+
+/*
+ * The circular_queue and helpers is used to implement the
+ * breadth-first search(BFS)algorithem, by which we can build
+ * the shortest path from the next lock to be acquired to the
+ * previous held lock if there is a circular between them.
+ */
+struct circular_queue {
+       unsigned long element[MAX_CIRCULAR_QUEUE_SIZE];
+       unsigned int  front, rear;
+};
+
+static struct circular_queue lock_cq;
+
+unsigned int max_bfs_queue_depth;
+
+static unsigned int lockdep_dependency_gen_id;
+
+static inline void __cq_init(struct circular_queue *cq)
+{
+       cq->front = cq->rear = 0;
+       lockdep_dependency_gen_id++;
+}
+
+static inline int __cq_empty(struct circular_queue *cq)
+{
+       return (cq->front == cq->rear);
+}
+
+static inline int __cq_full(struct circular_queue *cq)
+{
+       return ((cq->rear + 1) & CQ_MASK) == cq->front;
+}
+
+static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
+{
+       if (__cq_full(cq))
+               return -1;
+
+       cq->element[cq->rear] = elem;
+       cq->rear = (cq->rear + 1) & CQ_MASK;
+       return 0;
+}
+
+static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem)
+{
+       if (__cq_empty(cq))
+               return -1;
+
+       *elem = cq->element[cq->front];
+       cq->front = (cq->front + 1) & CQ_MASK;
+       return 0;
+}
+
+static inline unsigned int  __cq_get_elem_count(struct circular_queue *cq)
+{
+       return (cq->rear - cq->front) & CQ_MASK;
+}
+
+static inline void mark_lock_accessed(struct lock_list *lock,
+                                       struct lock_list *parent)
+{
+       unsigned long nr;
+
+       nr = lock - list_entries;
+       WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
+       lock->parent = parent;
+       lock->class->dep_gen_id = lockdep_dependency_gen_id;
+}
+
+static inline unsigned long lock_accessed(struct lock_list *lock)
+{
+       unsigned long nr;
+
+       nr = lock - list_entries;
+       WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
+       return lock->class->dep_gen_id == lockdep_dependency_gen_id;
+}
+
+static inline struct lock_list *get_lock_parent(struct lock_list *child)
+{
+       return child->parent;
+}
+
+static inline int get_lock_depth(struct lock_list *child)
+{
+       int depth = 0;
+       struct lock_list *parent;
+
+       while ((parent = get_lock_parent(child))) {
+               child = parent;
+               depth++;
+       }
+       return depth;
+}
 
 static int __bfs(struct lock_list *source_entry,
-                       void *data,
-                       int (*match)(struct lock_list *entry, void *data),
-                       struct lock_list **target_entry,
-                       int forward)
+                void *data,
+                int (*match)(struct lock_list *entry, void *data),
+                struct lock_list **target_entry,
+                int forward)
 {
        struct lock_list *entry;
        struct list_head *head;
@@ -975,6 +1023,7 @@ static int __bfs(struct lock_list *source_entry,
 
                list_for_each_entry(entry, head, entry) {
                        if (!lock_accessed(entry)) {
+                               unsigned int cq_depth;
                                mark_lock_accessed(entry, lock);
                                if (match(entry, data)) {
                                        *target_entry = entry;
@@ -986,6 +1035,9 @@ static int __bfs(struct lock_list *source_entry,
                                        ret = -1;
                                        goto exit;
                                }
+                               cq_depth = __cq_get_elem_count(cq);
+                               if (max_bfs_queue_depth < cq_depth)
+                                       max_bfs_queue_depth = cq_depth;
                        }
                }
        }
@@ -1034,6 +1086,56 @@ print_circular_bug_entry(struct lock_list *target, int depth)
        return 0;
 }
 
+static void
+print_circular_lock_scenario(struct held_lock *src,
+                            struct held_lock *tgt,
+                            struct lock_list *prt)
+{
+       struct lock_class *source = hlock_class(src);
+       struct lock_class *target = hlock_class(tgt);
+       struct lock_class *parent = prt->class;
+
+       /*
+        * A direct locking problem where unsafe_class lock is taken
+        * directly by safe_class lock, then all we need to show
+        * is the deadlock scenario, as it is obvious that the
+        * unsafe lock is taken under the safe lock.
+        *
+        * But if there is a chain instead, where the safe lock takes
+        * an intermediate lock (middle_class) where this lock is
+        * not the same as the safe lock, then the lock chain is
+        * used to describe the problem. Otherwise we would need
+        * to show a different CPU case for each link in the chain
+        * from the safe_class lock to the unsafe_class lock.
+        */
+       if (parent != source) {
+               printk("Chain exists of:\n  ");
+               __print_lock_name(source);
+               printk(" --> ");
+               __print_lock_name(parent);
+               printk(" --> ");
+               __print_lock_name(target);
+               printk("\n\n");
+       }
+
+       printk(" Possible unsafe locking scenario:\n\n");
+       printk("       CPU0                    CPU1\n");
+       printk("       ----                    ----\n");
+       printk("  lock(");
+       __print_lock_name(target);
+       printk(");\n");
+       printk("                               lock(");
+       __print_lock_name(parent);
+       printk(");\n");
+       printk("                               lock(");
+       __print_lock_name(target);
+       printk(");\n");
+       printk("  lock(");
+       __print_lock_name(source);
+       printk(");\n");
+       printk("\n *** DEADLOCK ***\n\n");
+}
+
 /*
  * When a circular dependency is detected, print the
  * header first:
@@ -1048,10 +1150,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
        if (debug_locks_silent)
                return 0;
 
-       printk("\n=======================================================\n");
-       printk(  "[ INFO: possible circular locking dependency detected ]\n");
-       print_kernel_version();
-       printk(  "-------------------------------------------------------\n");
+       printk("\n");
+       printk("======================================================\n");
+       printk("[ INFO: possible circular locking dependency detected ]\n");
+       print_kernel_ident();
+       printk("-------------------------------------------------------\n");
        printk("%s/%d is trying to acquire lock:\n",
                curr->comm, task_pid_nr(curr));
        print_lock(check_src);
@@ -1077,6 +1180,7 @@ static noinline int print_circular_bug(struct lock_list *this,
 {
        struct task_struct *curr = current;
        struct lock_list *parent;
+       struct lock_list *first_parent;
        int depth;
 
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
@@ -1090,6 +1194,7 @@ static noinline int print_circular_bug(struct lock_list *this,
        print_circular_bug_header(target, depth, check_src, check_tgt);
 
        parent = get_lock_parent(target);
+       first_parent = parent;
 
        while (parent) {
                print_circular_bug_entry(parent, --depth);
@@ -1097,6 +1202,9 @@ static noinline int print_circular_bug(struct lock_list *this,
        }
 
        printk("\nother info that might help us debug this:\n\n");
+       print_circular_lock_scenario(check_src, check_tgt,
+                                    first_parent);
+
        lockdep_print_held_locks(curr);
 
        printk("\nstack backtrace:\n");
@@ -1110,67 +1218,68 @@ static noinline int print_bfs_bug(int ret)
        if (!debug_locks_off_graph_unlock())
                return 0;
 
+       /*
+        * Breadth-first-search failed, graph got corrupted?
+        */
        WARN(1, "lockdep bfs error:%d\n", ret);
 
        return 0;
 }
 
-unsigned long __lockdep_count_forward_deps(struct lock_class *class,
-                                          unsigned int depth)
+static int noop_count(struct lock_list *entry, void *data)
 {
-       struct lock_list *entry;
-       unsigned long ret = 1;
+       (*(unsigned long *)data)++;
+       return 0;
+}
 
-       if (lockdep_dependency_visit(class, depth))
-               return 0;
+unsigned long __lockdep_count_forward_deps(struct lock_list *this)
+{
+       unsigned long  count = 0;
+       struct lock_list *uninitialized_var(target_entry);
 
-       /*
-        * Recurse this class's dependency list:
-        */
-       list_for_each_entry(entry, &class->locks_after, entry)
-               ret += __lockdep_count_forward_deps(entry->class, depth + 1);
+       __bfs_forwards(this, (void *)&count, noop_count, &target_entry);
 
-       return ret;
+       return count;
 }
-
 unsigned long lockdep_count_forward_deps(struct lock_class *class)
 {
        unsigned long ret, flags;
+       struct lock_list this;
+
+       this.parent = NULL;
+       this.class = class;
 
        local_irq_save(flags);
-       __raw_spin_lock(&lockdep_lock);
-       ret = __lockdep_count_forward_deps(class, 0);
-       __raw_spin_unlock(&lockdep_lock);
+       arch_spin_lock(&lockdep_lock);
+       ret = __lockdep_count_forward_deps(&this);
+       arch_spin_unlock(&lockdep_lock);
        local_irq_restore(flags);
 
        return ret;
 }
 
-unsigned long __lockdep_count_backward_deps(struct lock_class *class,
-                                           unsigned int depth)
+unsigned long __lockdep_count_backward_deps(struct lock_list *this)
 {
-       struct lock_list *entry;
-       unsigned long ret = 1;
+       unsigned long  count = 0;
+       struct lock_list *uninitialized_var(target_entry);
 
-       if (lockdep_dependency_visit(class, depth))
-               return 0;
-       /*
-        * Recurse this class's dependency list:
-        */
-       list_for_each_entry(entry, &class->locks_before, entry)
-               ret += __lockdep_count_backward_deps(entry->class, depth + 1);
+       __bfs_backwards(this, (void *)&count, noop_count, &target_entry);
 
-       return ret;
+       return count;
 }
 
 unsigned long lockdep_count_backward_deps(struct lock_class *class)
 {
        unsigned long ret, flags;
+       struct lock_list this;
+
+       this.parent = NULL;
+       this.class = class;
 
        local_irq_save(flags);
-       __raw_spin_lock(&lockdep_lock);
-       ret = __lockdep_count_backward_deps(class, 0);
-       __raw_spin_unlock(&lockdep_lock);
+       arch_spin_lock(&lockdep_lock);
+       ret = __lockdep_count_backward_deps(&this);
+       arch_spin_unlock(&lockdep_lock);
        local_irq_restore(flags);
 
        return ret;
@@ -1186,7 +1295,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
 {
        int result;
 
-       debug_atomic_inc(&nr_cyclic_checks);
+       debug_atomic_inc(nr_cyclic_checks);
 
        result = __bfs_forwards(root, target, class_equal, target_entry);
 
@@ -1200,14 +1309,6 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
  * without creating any illegal irq-safe -> irq-unsafe lock dependency.
  */
 
-
-#define   BFS_PROCESS_RET(ret) do { \
-                                       if (ret < 0) \
-                                               return print_bfs_bug(ret); \
-                                       if (ret == 1) \
-                                               return 1; \
-                               } while (0)
-
 static inline int usage_match(struct lock_list *entry, void *bit)
 {
        return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit);
@@ -1231,7 +1332,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
 {
        int result;
 
-       debug_atomic_inc(&nr_find_usage_forwards_checks);
+       debug_atomic_inc(nr_find_usage_forwards_checks);
 
        result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
 
@@ -1254,13 +1355,123 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
 {
        int result;
 
-       debug_atomic_inc(&nr_find_usage_backwards_checks);
+       debug_atomic_inc(nr_find_usage_backwards_checks);
 
        result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
 
        return result;
 }
 
+static void print_lock_class_header(struct lock_class *class, int depth)
+{
+       int bit;
+
+       printk("%*s->", depth, "");
+       print_lock_name(class);
+       printk(" ops: %lu", class->ops);
+       printk(" {\n");
+
+       for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
+               if (class->usage_mask & (1 << bit)) {
+                       int len = depth;
+
+                       len += printk("%*s   %s", depth, "", usage_str[bit]);
+                       len += printk(" at:\n");
+                       print_stack_trace(class->usage_traces + bit, len);
+               }
+       }
+       printk("%*s }\n", depth, "");
+
+       printk("%*s ... key      at: ",depth,"");
+       print_ip_sym((unsigned long)class->key);
+}
+
+/*
+ * printk the shortest lock dependencies from @start to @end in reverse order:
+ */
+static void __used
+print_shortest_lock_dependencies(struct lock_list *leaf,
+                               struct lock_list *root)
+{
+       struct lock_list *entry = leaf;
+       int depth;
+
+       /*compute depth from generated tree by BFS*/
+       depth = get_lock_depth(leaf);
+
+       do {
+               print_lock_class_header(entry->class, depth);
+               printk("%*s ... acquired at:\n", depth, "");
+               print_stack_trace(&entry->trace, 2);
+               printk("\n");
+
+               if (depth == 0 && (entry != root)) {
+                       printk("lockdep:%s bad path found in chain graph\n", __func__);
+                       break;
+               }
+
+               entry = get_lock_parent(entry);
+               depth--;
+       } while (entry && (depth >= 0));
+
+       return;
+}
+
+static void
+print_irq_lock_scenario(struct lock_list *safe_entry,
+                       struct lock_list *unsafe_entry,
+                       struct lock_class *prev_class,
+                       struct lock_class *next_class)
+{
+       struct lock_class *safe_class = safe_entry->class;
+       struct lock_class *unsafe_class = unsafe_entry->class;
+       struct lock_class *middle_class = prev_class;
+
+       if (middle_class == safe_class)
+               middle_class = next_class;
+
+       /*
+        * A direct locking problem where unsafe_class lock is taken
+        * directly by safe_class lock, then all we need to show
+        * is the deadlock scenario, as it is obvious that the
+        * unsafe lock is taken under the safe lock.
+        *
+        * But if there is a chain instead, where the safe lock takes
+        * an intermediate lock (middle_class) where this lock is
+        * not the same as the safe lock, then the lock chain is
+        * used to describe the problem. Otherwise we would need
+        * to show a different CPU case for each link in the chain
+        * from the safe_class lock to the unsafe_class lock.
+        */
+       if (middle_class != unsafe_class) {
+               printk("Chain exists of:\n  ");
+               __print_lock_name(safe_class);
+               printk(" --> ");
+               __print_lock_name(middle_class);
+               printk(" --> ");
+               __print_lock_name(unsafe_class);
+               printk("\n\n");
+       }
+
+       printk(" Possible interrupt unsafe locking scenario:\n\n");
+       printk("       CPU0                    CPU1\n");
+       printk("       ----                    ----\n");
+       printk("  lock(");
+       __print_lock_name(unsafe_class);
+       printk(");\n");
+       printk("                               local_irq_disable();\n");
+       printk("                               lock(");
+       __print_lock_name(safe_class);
+       printk(");\n");
+       printk("                               lock(");
+       __print_lock_name(middle_class);
+       printk(");\n");
+       printk("  <Interrupt>\n");
+       printk("    lock(");
+       __print_lock_name(safe_class);
+       printk(");\n");
+       printk("\n *** DEADLOCK ***\n\n");
+}
 
 static int
 print_bad_irq_dependency(struct task_struct *curr,
@@ -1277,11 +1488,12 @@ print_bad_irq_dependency(struct task_struct *curr,
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                return 0;
 
-       printk("\n======================================================\n");
-       printk(  "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
+       printk("\n");
+       printk("======================================================\n");
+       printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
                irqclass, irqclass);
-       print_kernel_version();
-       printk(  "------------------------------------------------------\n");
+       print_kernel_ident();
+       printk("------------------------------------------------------\n");
        printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
                curr->comm, task_pid_nr(curr),
                curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
@@ -1313,6 +1525,9 @@ print_bad_irq_dependency(struct task_struct *curr,
        print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
 
        printk("\nother info that might help us debug this:\n\n");
+       print_irq_lock_scenario(backwards_entry, forwards_entry,
+                               hlock_class(prev), hlock_class(next));
+
        lockdep_print_held_locks(curr);
 
        printk("\nthe dependencies between %s-irq-safe lock", irqclass);
@@ -1347,12 +1562,18 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
 
        this.class = hlock_class(prev);
        ret = find_usage_backwards(&this, bit_backwards, &target_entry);
-       BFS_PROCESS_RET(ret);
+       if (ret < 0)
+               return print_bfs_bug(ret);
+       if (ret == 1)
+               return ret;
 
        that.parent = NULL;
        that.class = hlock_class(next);
        ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
-       BFS_PROCESS_RET(ret);
+       if (ret < 0)
+               return print_bfs_bug(ret);
+       if (ret == 1)
+               return ret;
 
        return print_bad_irq_dependency(curr, &this, &that,
                        target_entry, target_entry1,
@@ -1470,6 +1691,26 @@ static inline void inc_chains(void)
 
 #endif
 
+static void
+print_deadlock_scenario(struct held_lock *nxt,
+                            struct held_lock *prv)
+{
+       struct lock_class *next = hlock_class(nxt);
+       struct lock_class *prev = hlock_class(prv);
+
+       printk(" Possible unsafe locking scenario:\n\n");
+       printk("       CPU0\n");
+       printk("       ----\n");
+       printk("  lock(");
+       __print_lock_name(prev);
+       printk(");\n");
+       printk("  lock(");
+       __print_lock_name(next);
+       printk(");\n");
+       printk("\n *** DEADLOCK ***\n\n");
+       printk(" May be due to missing lock nesting notation\n\n");
+}
+
 static int
 print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
                   struct held_lock *next)
@@ -1477,10 +1718,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                return 0;
 
-       printk("\n=============================================\n");
-       printk(  "[ INFO: possible recursive locking detected ]\n");
-       print_kernel_version();
-       printk(  "---------------------------------------------\n");
+       printk("\n");
+       printk("=============================================\n");
+       printk("[ INFO: possible recursive locking detected ]\n");
+       print_kernel_ident();
+       printk("---------------------------------------------\n");
        printk("%s/%d is trying to acquire lock:\n",
                curr->comm, task_pid_nr(curr));
        print_lock(next);
@@ -1488,6 +1730,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
        print_lock(prev);
 
        printk("\nother info that might help us debug this:\n");
+       print_deadlock_scenario(next, prev);
        lockdep_print_held_locks(curr);
 
        printk("\nstack backtrace:\n");
@@ -1564,12 +1807,20 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-              struct held_lock *next, int distance)
+              struct held_lock *next, int distance, int trylock_loop)
 {
        struct lock_list *entry;
        int ret;
        struct lock_list this;
        struct lock_list *uninitialized_var(target_entry);
+       /*
+        * Static variable, serialized by the graph_lock().
+        *
+        * We use this static variable to save the stack trace in case
+        * we call into this function multiple times due to encountering
+        * trylocks in the held lock stack.
+        */
+       static struct stack_trace trace;
 
        /*
         * Prove that the new <prev> -> <next> dependency would not
@@ -1617,20 +1868,23 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
                }
        }
 
+       if (!trylock_loop && !save_trace(&trace))
+               return 0;
+
        /*
         * Ok, all validations passed, add the new lock
         * to the previous lock's dependency list:
         */
        ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
                               &hlock_class(prev)->locks_after,
-                              next->acquire_ip, distance);
+                              next->acquire_ip, distance, &trace);
 
        if (!ret)
                return 0;
 
        ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
                               &hlock_class(next)->locks_before,
-                              next->acquire_ip, distance);
+                              next->acquire_ip, distance, &trace);
        if (!ret)
                return 0;
 
@@ -1660,6 +1914,7 @@ static int
 check_prevs_add(struct task_struct *curr, struct held_lock *next)
 {
        int depth = curr->lockdep_depth;
+       int trylock_loop = 0;
        struct held_lock *hlock;
 
        /*
@@ -1685,7 +1940,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
                 * added:
                 */
                if (hlock->read != 2) {
-                       if (!check_prev_add(curr, hlock, next, distance))
+                       if (!check_prev_add(curr, hlock, next,
+                                               distance, trylock_loop))
                                return 0;
                        /*
                         * Stop after the first non-trylock entry,
@@ -1708,12 +1964,18 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
                if (curr->held_locks[depth].irq_context !=
                                curr->held_locks[depth-1].irq_context)
                        break;
+               trylock_loop = 1;
        }
        return 1;
 out_bug:
        if (!debug_locks_off_graph_unlock())
                return 0;
 
+       /*
+        * Clearly we all shouldn't be here, but since we made it we
+        * can reliable say we messed up our state. See the above two
+        * gotos for reasons why we could possibly end up here.
+        */
        WARN_ON(1);
 
        return 0;
@@ -1742,9 +2004,14 @@ static inline int lookup_chain_cache(struct task_struct *curr,
        struct lock_class *class = hlock_class(hlock);
        struct list_head *hash_head = chainhashentry(chain_key);
        struct lock_chain *chain;
-       struct held_lock *hlock_curr, *hlock_next;
-       int i, j, n, cn;
+       struct held_lock *hlock_curr;
+       int i, j;
 
+       /*
+        * We might need to take the graph lock, ensure we've got IRQs
+        * disabled to make this an IRQ-safe lock.. for recursion reasons
+        * lockdep won't complain about its own locking errors.
+        */
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return 0;
        /*
@@ -1754,7 +2021,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
        list_for_each_entry(chain, hash_head, entry) {
                if (chain->chain_key == chain_key) {
 cache_hit:
-                       debug_atomic_inc(&chain_lookup_hits);
+                       debug_atomic_inc(chain_lookup_hits);
                        if (very_verbose(class))
                                printk("\nhash chain already cached, key: "
                                        "%016Lx tail class: [%p] %s\n",
@@ -1785,8 +2052,7 @@ cache_hit:
                if (!debug_locks_off_graph_unlock())
                        return 0;
 
-               printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
-               printk("turning off the locking correctness validator.\n");
+               print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
                dump_stack();
                return 0;
        }
@@ -1794,24 +2060,16 @@ cache_hit:
        chain->chain_key = chain_key;
        chain->irq_context = hlock->irq_context;
        /* Find the first held_lock of current chain */
-       hlock_next = hlock;
        for (i = curr->lockdep_depth - 1; i >= 0; i--) {
                hlock_curr = curr->held_locks + i;
-               if (hlock_curr->irq_context != hlock_next->irq_context)
+               if (hlock_curr->irq_context != hlock->irq_context)
                        break;
-               hlock_next = hlock;
        }
        i++;
        chain->depth = curr->lockdep_depth + 1 - i;
-       cn = nr_chain_hlocks;
-       while (cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS) {
-               n = cmpxchg(&nr_chain_hlocks, cn, cn + chain->depth);
-               if (n == cn)
-                       break;
-               cn = n;
-       }
-       if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
-               chain->base = cn;
+       if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
+               chain->base = nr_chain_hlocks;
+               nr_chain_hlocks += chain->depth;
                for (j = 0; j < chain->depth - 1; j++, i++) {
                        int lock_id = curr->held_locks[i].class_idx - 1;
                        chain_hlocks[chain->base + j] = lock_id;
@@ -1819,7 +2077,7 @@ cache_hit:
                chain_hlocks[chain->base + j] = class - lock_classes;
        }
        list_add_tail_rcu(&chain->entry, hash_head);
-       debug_atomic_inc(&chain_lookup_misses);
+       debug_atomic_inc(chain_lookup_misses);
        inc_chains();
 
        return 1;
@@ -1902,6 +2160,10 @@ static void check_chain_key(struct task_struct *curr)
                hlock = curr->held_locks + i;
                if (chain_key != hlock->prev_chain_key) {
                        debug_locks_off();
+                       /*
+                        * We got mighty confused, our chain keys don't match
+                        * with what we expect, someone trample on our task state?
+                        */
                        WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n",
                                curr->lockdep_depth, i,
                                (unsigned long long)chain_key,
@@ -1909,6 +2171,9 @@ static void check_chain_key(struct task_struct *curr)
                        return;
                }
                id = hlock->class_idx - 1;
+               /*
+                * Whoops ran out of static storage again?
+                */
                if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
                        return;
 
@@ -1920,6 +2185,10 @@ static void check_chain_key(struct task_struct *curr)
        }
        if (chain_key != curr->curr_chain_key) {
                debug_locks_off();
+               /*
+                * More smoking hash instead of calculating it, damn see these
+                * numbers float.. I bet that a pink elephant stepped on my memory.
+                */
                WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n",
                        curr->lockdep_depth, i,
                        (unsigned long long)chain_key,
@@ -1928,6 +2197,24 @@ static void check_chain_key(struct task_struct *curr)
 #endif
 }
 
+static void
+print_usage_bug_scenario(struct held_lock *lock)
+{
+       struct lock_class *class = hlock_class(lock);
+
+       printk(" Possible unsafe locking scenario:\n\n");
+       printk("       CPU0\n");
+       printk("       ----\n");
+       printk("  lock(");
+       __print_lock_name(class);
+       printk(");\n");
+       printk("  <Interrupt>\n");
+       printk("    lock(");
+       __print_lock_name(class);
+       printk(");\n");
+       printk("\n *** DEADLOCK ***\n\n");
+}
+
 static int
 print_usage_bug(struct task_struct *curr, struct held_lock *this,
                enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
@@ -1935,10 +2222,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                return 0;
 
-       printk("\n=================================\n");
-       printk(  "[ INFO: inconsistent lock state ]\n");
-       print_kernel_version();
-       printk(  "---------------------------------\n");
+       printk("\n");
+       printk("=================================\n");
+       printk("[ INFO: inconsistent lock state ]\n");
+       print_kernel_ident();
+       printk("---------------------------------\n");
 
        printk("inconsistent {%s} -> {%s} usage.\n",
                usage_str[prev_bit], usage_str[new_bit]);
@@ -1956,6 +2244,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 
        print_irqtrace_events(curr);
        printk("\nother info that might help us debug this:\n");
+       print_usage_bug_scenario(this);
+
        lockdep_print_held_locks(curr);
 
        printk("\nstack backtrace:\n");
@@ -1990,13 +2280,18 @@ print_irq_inversion_bug(struct task_struct *curr,
                        struct held_lock *this, int forwards,
                        const char *irqclass)
 {
+       struct lock_list *entry = other;
+       struct lock_list *middle = NULL;
+       int depth;
+
        if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                return 0;
 
-       printk("\n=========================================================\n");
-       printk(  "[ INFO: possible irq lock inversion dependency detected ]\n");
-       print_kernel_version();
-       printk(  "---------------------------------------------------------\n");
+       printk("\n");
+       printk("=========================================================\n");
+       printk("[ INFO: possible irq lock inversion dependency detected ]\n");
+       print_kernel_ident();
+       printk("---------------------------------------------------------\n");
        printk("%s/%d just changed the state of lock:\n",
                curr->comm, task_pid_nr(curr));
        print_lock(this);
@@ -2008,6 +2303,25 @@ print_irq_inversion_bug(struct task_struct *curr,
        printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
 
        printk("\nother info that might help us debug this:\n");
+
+       /* Find a middle lock (if one exists) */
+       depth = get_lock_depth(other);
+       do {
+               if (depth == 0 && (entry != root)) {
+                       printk("lockdep:%s bad path found in chain graph\n", __func__);
+                       break;
+               }
+               middle = entry;
+               entry = get_lock_parent(entry);
+               depth--;
+       } while (entry && entry != root && (depth >= 0));
+       if (forwards)
+               print_irq_lock_scenario(root, other,
+                       middle ? middle->class : root->class, other->class);
+       else
+               print_irq_lock_scenario(other, root,
+                       middle ? middle->class : other->class, root->class);
+
        lockdep_print_held_locks(curr);
 
        printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
@@ -2036,7 +2350,10 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
        root.parent = NULL;
        root.class = hlock_class(this);
        ret = find_usage_forwards(&root, bit, &target_entry);
-       BFS_PROCESS_RET(ret);
+       if (ret < 0)
+               return print_bfs_bug(ret);
+       if (ret == 1)
+               return ret;
 
        return print_irq_inversion_bug(curr, &root, target_entry,
                                        this, 1, irqclass);
@@ -2057,10 +2374,13 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
        root.parent = NULL;
        root.class = hlock_class(this);
        ret = find_usage_backwards(&root, bit, &target_entry);
-       BFS_PROCESS_RET(ret);
+       if (ret < 0)
+               return print_bfs_bug(ret);
+       if (ret == 1)
+               return ret;
 
        return print_irq_inversion_bug(curr, &root, target_entry,
-                                       this, 1, irqclass);
+                                       this, 0, irqclass);
 }
 
 void print_irqtrace_events(struct task_struct *curr)
@@ -2195,6 +2515,9 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
 
                BUG_ON(usage_bit >= LOCK_USAGE_STATES);
 
+               if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys)
+                       continue;
+
                if (!mark_lock(curr, hlock, usage_bit))
                        return 0;
        }
@@ -2203,47 +2526,15 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
 }
 
 /*
- * Debugging helper: via this flag we know that we are in
- * 'early bootup code', and will warn about any invalid irqs-on event:
- */
-static int early_boot_irqs_enabled;
-
-void early_boot_irqs_off(void)
-{
-       early_boot_irqs_enabled = 0;
-}
-
-void early_boot_irqs_on(void)
-{
-       early_boot_irqs_enabled = 1;
-}
-
-/*
  * Hardirqs will be enabled:
  */
-void trace_hardirqs_on_caller(unsigned long ip)
+static void __trace_hardirqs_on_caller(unsigned long ip)
 {
        struct task_struct *curr = current;
 
-       time_hardirqs_on(CALLER_ADDR0, ip);
-
-       if (unlikely(!debug_locks || current->lockdep_recursion))
-               return;
-
-       if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled)))
-               return;
-
-       if (unlikely(curr->hardirqs_enabled)) {
-               debug_atomic_inc(&redundant_hardirqs_on);
-               return;
-       }
        /* we'll do an OFF -> ON transition: */
        curr->hardirqs_enabled = 1;
 
-       if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
-               return;
-       if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
-               return;
        /*
         * We are going to turn hardirqs on, so set the
         * usage bit for all held locks:
@@ -2261,7 +2552,50 @@ void trace_hardirqs_on_caller(unsigned long ip)
 
        curr->hardirq_enable_ip = ip;
        curr->hardirq_enable_event = ++curr->irq_events;
-       debug_atomic_inc(&hardirqs_on_events);
+       debug_atomic_inc(hardirqs_on_events);
+}
+
+void trace_hardirqs_on_caller(unsigned long ip)
+{
+       time_hardirqs_on(CALLER_ADDR0, ip);
+
+       if (unlikely(!debug_locks || current->lockdep_recursion))
+               return;
+
+       if (unlikely(current->hardirqs_enabled)) {
+               /*
+                * Neither irq nor preemption are disabled here
+                * so this is racy by nature but losing one hit
+                * in a stat is not a big deal.
+                */
+               __debug_atomic_inc(redundant_hardirqs_on);
+               return;
+       }
+
+       /*
+        * We're enabling irqs and according to our state above irqs weren't
+        * already enabled, yet we find the hardware thinks they are in fact
+        * enabled.. someone messed up their IRQ state tracing.
+        */
+       if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+               return;
+
+       /*
+        * See the fine text that goes along with this variable definition.
+        */
+       if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
+               return;
+
+       /*
+        * Can't allow enabling interrupts while in an interrupt handler,
+        * that's general bad form and such. Recursion, limited stack etc..
+        */
+       if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+               return;
+
+       current->lockdep_recursion = 1;
+       __trace_hardirqs_on_caller(ip);
+       current->lockdep_recursion = 0;
 }
 EXPORT_SYMBOL(trace_hardirqs_on_caller);
 
@@ -2283,6 +2617,10 @@ void trace_hardirqs_off_caller(unsigned long ip)
        if (unlikely(!debug_locks || current->lockdep_recursion))
                return;
 
+       /*
+        * So we're supposed to get called after you mask local IRQs, but for
+        * some reason the hardware doesn't quite think you did a proper job.
+        */
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return;
 
@@ -2293,9 +2631,9 @@ void trace_hardirqs_off_caller(unsigned long ip)
                curr->hardirqs_enabled = 0;
                curr->hardirq_disable_ip = ip;
                curr->hardirq_disable_event = ++curr->irq_events;
-               debug_atomic_inc(&hardirqs_off_events);
+               debug_atomic_inc(hardirqs_off_events);
        } else
-               debug_atomic_inc(&redundant_hardirqs_off);
+               debug_atomic_inc(redundant_hardirqs_off);
 }
 EXPORT_SYMBOL(trace_hardirqs_off_caller);
 
@@ -2312,24 +2650,29 @@ void trace_softirqs_on(unsigned long ip)
 {
        struct task_struct *curr = current;
 
-       if (unlikely(!debug_locks))
+       if (unlikely(!debug_locks || current->lockdep_recursion))
                return;
 
+       /*
+        * We fancy IRQs being disabled here, see softirq.c, avoids
+        * funny state and nesting things.
+        */
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return;
 
        if (curr->softirqs_enabled) {
-               debug_atomic_inc(&redundant_softirqs_on);
+               debug_atomic_inc(redundant_softirqs_on);
                return;
        }
 
+       current->lockdep_recursion = 1;
        /*
         * We'll do an OFF -> ON transition:
         */
        curr->softirqs_enabled = 1;
        curr->softirq_enable_ip = ip;
        curr->softirq_enable_event = ++curr->irq_events;
-       debug_atomic_inc(&softirqs_on_events);
+       debug_atomic_inc(softirqs_on_events);
        /*
         * We are going to turn softirqs on, so set the
         * usage bit for all held locks, if hardirqs are
@@ -2337,6 +2680,7 @@ void trace_softirqs_on(unsigned long ip)
         */
        if (curr->hardirqs_enabled)
                mark_held_locks(curr, SOFTIRQ);
+       current->lockdep_recursion = 0;
 }
 
 /*
@@ -2346,9 +2690,12 @@ void trace_softirqs_off(unsigned long ip)
 {
        struct task_struct *curr = current;
 
-       if (unlikely(!debug_locks))
+       if (unlikely(!debug_locks || current->lockdep_recursion))
                return;
 
+       /*
+        * We fancy IRQs being disabled here, see softirq.c
+        */
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return;
 
@@ -2359,10 +2706,13 @@ void trace_softirqs_off(unsigned long ip)
                curr->softirqs_enabled = 0;
                curr->softirq_disable_ip = ip;
                curr->softirq_disable_event = ++curr->irq_events;
-               debug_atomic_inc(&softirqs_off_events);
+               debug_atomic_inc(softirqs_off_events);
+               /*
+                * Whoops, we wanted softirqs off, so why aren't they?
+                */
                DEBUG_LOCKS_WARN_ON(!softirq_count());
        } else
-               debug_atomic_inc(&redundant_softirqs_off);
+               debug_atomic_inc(redundant_softirqs_off);
 }
 
 static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
@@ -2384,6 +2734,9 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
        if (!(gfp_mask & __GFP_FS))
                return;
 
+       /*
+        * Oi! Can't be having __GFP_FS allocations with IRQs disabled.
+        */
        if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
                return;
 
@@ -2496,13 +2849,13 @@ static int separate_irq_context(struct task_struct *curr,
        return 0;
 }
 
-#else
+#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
 
 static inline
 int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
                enum lock_usage_bit new_bit)
 {
-       WARN_ON(1);
+       WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */
        return 1;
 }
 
@@ -2522,7 +2875,7 @@ void lockdep_trace_alloc(gfp_t gfp_mask)
 {
 }
 
-#endif
+#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
 
 /*
  * Mark a lock with a usage bit, and validate the state transition:
@@ -2542,7 +2895,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
        if (!graph_lock())
                return 0;
        /*
-        * Make sure we didnt race:
+        * Make sure we didn't race:
         */
        if (unlikely(hlock_class(this)->usage_mask & new_mask)) {
                graph_unlock();
@@ -2567,7 +2920,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
                        return 0;
                break;
        case LOCK_USED:
-               debug_atomic_dec(&nr_unused_locks);
+               debug_atomic_dec(nr_unused_locks);
                break;
        default:
                if (!debug_locks_off_graph_unlock())
@@ -2597,11 +2950,20 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 void lockdep_init_map(struct lockdep_map *lock, const char *name,
                      struct lock_class_key *key, int subclass)
 {
-       lock->class_cache = NULL;
+       int i;
+
+       kmemcheck_mark_initialized(lock, sizeof(*lock));
+
+       for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
+               lock->class_cache[i] = NULL;
+
 #ifdef CONFIG_LOCK_STAT
        lock->cpu = raw_smp_processor_id();
 #endif
 
+       /*
+        * Can't be having no nameless bastards around this place!
+        */
        if (DEBUG_LOCKS_WARN_ON(!name)) {
                lock->name = "NULL";
                return;
@@ -2609,6 +2971,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 
        lock->name = name;
 
+       /*
+        * No key, no joy, we need to hash something.
+        */
        if (DEBUG_LOCKS_WARN_ON(!key))
                return;
        /*
@@ -2616,6 +2981,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
         */
        if (!static_obj(key)) {
                printk("BUG: key %p not in .data!\n", key);
+               /*
+                * What it says above ^^^^^, I suggest you read it.
+                */
                DEBUG_LOCKS_WARN_ON(1);
                return;
        }
@@ -2629,19 +2997,60 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 }
 EXPORT_SYMBOL_GPL(lockdep_init_map);
 
+struct lock_class_key __lockdep_no_validate__;
+EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
+
+static int
+print_lock_nested_lock_not_held(struct task_struct *curr,
+                               struct held_lock *hlock,
+                               unsigned long ip)
+{
+       if (!debug_locks_off())
+               return 0;
+       if (debug_locks_silent)
+               return 0;
+
+       printk("\n");
+       printk("==================================\n");
+       printk("[ BUG: Nested lock was not taken ]\n");
+       print_kernel_ident();
+       printk("----------------------------------\n");
+
+       printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
+       print_lock(hlock);
+
+       printk("\nbut this task is not holding:\n");
+       printk("%s\n", hlock->nest_lock->name);
+
+       printk("\nstack backtrace:\n");
+       dump_stack();
+
+       printk("\nother info that might help us debug this:\n");
+       lockdep_print_held_locks(curr);
+
+       printk("\nstack backtrace:\n");
+       dump_stack();
+
+       return 0;
+}
+
+static int __lock_is_held(struct lockdep_map *lock);
+
 /*
  * This gets called for every mutex_lock*()/spin_lock*() operation.
  * We maintain the dependency maps and validate the locking attempt:
  */
 static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
                          int trylock, int read, int check, int hardirqs_off,
-                         struct lockdep_map *nest_lock, unsigned long ip)
+                         struct lockdep_map *nest_lock, unsigned long ip,
+                         int references)
 {
        struct task_struct *curr = current;
        struct lock_class *class = NULL;
        struct held_lock *hlock;
        unsigned int depth, id;
        int chain_head = 0;
+       int class_idx;
        u64 chain_key;
 
        if (!prove_locking)
@@ -2650,28 +3059,28 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        if (unlikely(!debug_locks))
                return 0;
 
+       /*
+        * Lockdep should run with IRQs disabled, otherwise we could
+        * get an interrupt which would want to take locks, which would
+        * end up in lockdep and have you got a head-ache already?
+        */
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return 0;
 
-       if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
-               debug_locks_off();
-               printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
-               printk("turning off the locking correctness validator.\n");
-               dump_stack();
-               return 0;
-       }
+       if (lock->key == &__lockdep_no_validate__)
+               check = 1;
 
-       if (!subclass)
-               class = lock->class_cache;
+       if (subclass < NR_LOCKDEP_CACHING_CLASSES)
+               class = lock->class_cache[subclass];
        /*
-        * Not cached yet or subclass?
+        * Not cached?
         */
        if (unlikely(!class)) {
                class = register_lock_class(lock, subclass, 0);
                if (!class)
                        return 0;
        }
-       debug_atomic_inc((atomic_t *)&class->ops);
+       atomic_inc((atomic_t *)&class->ops);
        if (very_verbose(class)) {
                printk("\nacquire class [%p] %s", class->key, class->name);
                if (class->name_version > 1)
@@ -2686,13 +3095,34 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
         * dependency checks are done)
         */
        depth = curr->lockdep_depth;
+       /*
+        * Ran out of static storage for our per-task lock stack again have we?
+        */
        if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
                return 0;
 
+       class_idx = class - lock_classes + 1;
+
+       if (depth) {
+               hlock = curr->held_locks + depth - 1;
+               if (hlock->class_idx == class_idx && nest_lock) {
+                       if (hlock->references)
+                               hlock->references++;
+                       else
+                               hlock->references = 2;
+
+                       return 1;
+               }
+       }
+
        hlock = curr->held_locks + depth;
+       /*
+        * Plain impossible, we just registered it and checked it weren't no
+        * NULL like.. I bet this mushroom I ate was good!
+        */
        if (DEBUG_LOCKS_WARN_ON(!class))
                return 0;
-       hlock->class_idx = class - lock_classes + 1;
+       hlock->class_idx = class_idx;
        hlock->acquire_ip = ip;
        hlock->instance = lock;
        hlock->nest_lock = nest_lock;
@@ -2700,9 +3130,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        hlock->read = read;
        hlock->check = check;
        hlock->hardirqs_off = !!hardirqs_off;
+       hlock->references = references;
 #ifdef CONFIG_LOCK_STAT
        hlock->waittime_stamp = 0;
-       hlock->holdtime_stamp = sched_clock();
+       hlock->holdtime_stamp = lockstat_clock();
 #endif
 
        if (check == 2 && !mark_irqflags(curr, hlock))
@@ -2723,11 +3154,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
         * the hash, not class->key.
         */
        id = class - lock_classes;
+       /*
+        * Whoops, we did it again.. ran straight out of our static allocation.
+        */
        if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
                return 0;
 
        chain_key = curr->curr_chain_key;
        if (!depth) {
+               /*
+                * How can we have a chain hash when we ain't got no keys?!
+                */
                if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
                        return 0;
                chain_head = 1;
@@ -2740,6 +3177,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        }
        chain_key = iterate_chain_key(chain_key, id);
 
+       if (nest_lock && !__lock_is_held(nest_lock))
+               return print_lock_nested_lock_not_held(curr, hlock, ip);
+
        if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
                return 0;
 
@@ -2752,9 +3192,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 #endif
        if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
                debug_locks_off();
-               printk("BUG: MAX_LOCK_DEPTH too low!\n");
-               printk("turning off the locking correctness validator.\n");
+               print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!");
+               printk(KERN_DEBUG "depth: %i  max: %lu!\n",
+                      curr->lockdep_depth, MAX_LOCK_DEPTH);
+
+               lockdep_print_held_locks(current);
+               debug_show_all_locks();
                dump_stack();
+
                return 0;
        }
 
@@ -2765,7 +3210,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 }
 
 static int
-print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
+print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
                           unsigned long ip)
 {
        if (!debug_locks_off())
@@ -2773,9 +3218,11 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
        if (debug_locks_silent)
                return 0;
 
-       printk("\n=====================================\n");
-       printk(  "[ BUG: bad unlock balance detected! ]\n");
-       printk(  "-------------------------------------\n");
+       printk("\n");
+       printk("=====================================\n");
+       printk("[ BUG: bad unlock balance detected! ]\n");
+       print_kernel_ident();
+       printk("-------------------------------------\n");
        printk("%s/%d is trying to release lock (",
                curr->comm, task_pid_nr(curr));
        print_lockdep_cache(lock);
@@ -2799,15 +3246,53 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
 {
        if (unlikely(!debug_locks))
                return 0;
+       /*
+        * Lockdep should run with IRQs disabled, recursion, head-ache, etc..
+        */
        if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                return 0;
 
        if (curr->lockdep_depth <= 0)
-               return print_unlock_inbalance_bug(curr, lock, ip);
+               return print_unlock_imbalance_bug(curr, lock, ip);
 
        return 1;
 }
 
+static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
+{
+       if (hlock->instance == lock)
+               return 1;
+
+       if (hlock->references) {
+               struct lock_class *class = lock->class_cache[0];
+
+               if (!class)
+                       class = look_up_lock_class(lock, 0);
+
+               /*
+                * If look_up_lock_class() failed to find a class, we're trying
+                * to test if we hold a lock that has never yet been acquired.
+                * Clearly if the lock hasn't been acquired _ever_, we're not
+                * holding it either, so report failure.
+                */
+               if (!class)
+                       return 0;
+
+               /*
+                * References, but not a lock we're actually ref-counting?
+                * State got messed up, follow the sites that change ->references
+                * and try to make sense of it.
+                */
+               if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
+                       return 0;
+
+               if (hlock->class_idx == class - lock_classes + 1)
+                       return 1;
+       }
+
+       return 0;
+}
+
 static int
 __lock_set_class(struct lockdep_map *lock, const char *name,
                 struct lock_class_key *key, unsigned int subclass,
@@ -2820,6 +3305,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
        int i;
 
        depth = curr->lockdep_depth;
+       /*
+        * This function is about (re)setting the class of a held lock,
+        * yet we're not actually holding any locks. Naughty user!
+        */
        if (DEBUG_LOCKS_WARN_ON(!depth))
                return 0;
 
@@ -2831,11 +3320,11 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
                 */
                if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
                        break;
-               if (hlock->instance == lock)
+               if (match_held_lock(hlock, lock))
                        goto found_it;
                prev_hlock = hlock;
        }
-       return print_unlock_inbalance_bug(curr, lock, ip);
+       return print_unlock_imbalance_bug(curr, lock, ip);
 
 found_it:
        lockdep_init_map(lock, name, key, 0);
@@ -2850,10 +3339,15 @@ found_it:
                if (!__lock_acquire(hlock->instance,
                        hlock_class(hlock)->subclass, hlock->trylock,
                                hlock->read, hlock->check, hlock->hardirqs_off,
-                               hlock->nest_lock, hlock->acquire_ip))
+                               hlock->nest_lock, hlock->acquire_ip,
+                               hlock->references))
                        return 0;
        }
 
+       /*
+        * I took it apart and put it back together again, except now I have
+        * these 'spare' parts.. where shall I put them.
+        */
        if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
                return 0;
        return 1;
@@ -2878,6 +3372,10 @@ lock_release_non_nested(struct task_struct *curr,
         * of held locks:
         */
        depth = curr->lockdep_depth;
+       /*
+        * So we're all set to release this lock.. wait what lock? We don't
+        * own any locks, you've been drinking again?
+        */
        if (DEBUG_LOCKS_WARN_ON(!depth))
                return 0;
 
@@ -2889,20 +3387,34 @@ lock_release_non_nested(struct task_struct *curr,
                 */
                if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
                        break;
-               if (hlock->instance == lock)
+               if (match_held_lock(hlock, lock))
                        goto found_it;
                prev_hlock = hlock;
        }
-       return print_unlock_inbalance_bug(curr, lock, ip);
+       return print_unlock_imbalance_bug(curr, lock, ip);
 
 found_it:
-       lock_release_holdtime(hlock);
+       if (hlock->instance == lock)
+               lock_release_holdtime(hlock);
+
+       if (hlock->references) {
+               hlock->references--;
+               if (hlock->references) {
+                       /*
+                        * We had, and after removing one, still have
+                        * references, the current lock stack is still
+                        * valid. We're done!
+                        */
+                       return 1;
+               }
+       }
 
        /*
         * We have the right lock to unlock, 'hlock' points to it.
         * Now we remove it from the stack, and add back the other
         * entries (if any), recalculating the hash along the way:
         */
+
        curr->lockdep_depth = i;
        curr->curr_chain_key = hlock->prev_chain_key;
 
@@ -2911,10 +3423,15 @@ found_it:
                if (!__lock_acquire(hlock->instance,
                        hlock_class(hlock)->subclass, hlock->trylock,
                                hlock->read, hlock->check, hlock->hardirqs_off,
-                               hlock->nest_lock, hlock->acquire_ip))
+                               hlock->nest_lock, hlock->acquire_ip,
+                               hlock->references))
                        return 0;
        }
 
+       /*
+        * We had N bottles of beer on the wall, we drank one, but now
+        * there's not N-1 bottles of beer left on the wall...
+        */
        if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
                return 0;
        return 1;
@@ -2941,10 +3458,13 @@ static int lock_release_nested(struct task_struct *curr,
        /*
         * Is the unlock non-nested:
         */
-       if (hlock->instance != lock)
+       if (hlock->instance != lock || hlock->references)
                return lock_release_non_nested(curr, lock, ip);
        curr->lockdep_depth--;
 
+       /*
+        * No more locks, but somehow we've got hash left over, who left it?
+        */
        if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
                return 0;
 
@@ -2986,6 +3506,21 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
        check_chain_key(curr);
 }
 
+static int __lock_is_held(struct lockdep_map *lock)
+{
+       struct task_struct *curr = current;
+       int i;
+
+       for (i = 0; i < curr->lockdep_depth; i++) {
+               struct held_lock *hlock = curr->held_locks + i;
+
+               if (match_held_lock(hlock, lock))
+                       return 1;
+       }
+
+       return 0;
+}
+
 /*
  * Check whether we follow the irq-flags state precisely:
  */
@@ -3012,10 +3547,13 @@ static void check_flags(unsigned long flags)
         * check if not in hardirq contexts:
         */
        if (!hardirq_count()) {
-               if (softirq_count())
+               if (softirq_count()) {
+                       /* like the above, but with softirqs */
                        DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
-               else
+               } else {
+                       /* lick the above, does it taste good? */
                        DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
+               }
        }
 
        if (!debug_locks)
@@ -3052,8 +3590,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 {
        unsigned long flags;
 
-       trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
-
        if (unlikely(current->lockdep_recursion))
                return;
 
@@ -3061,8 +3597,9 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        check_flags(flags);
 
        current->lockdep_recursion = 1;
+       trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
        __lock_acquire(lock, subclass, trylock, read, check,
-                      irqs_disabled_flags(flags), nest_lock, ip);
+                      irqs_disabled_flags(flags), nest_lock, ip, 0);
        current->lockdep_recursion = 0;
        raw_local_irq_restore(flags);
 }
@@ -3073,20 +3610,39 @@ void lock_release(struct lockdep_map *lock, int nested,
 {
        unsigned long flags;
 
-       trace_lock_release(lock, nested, ip);
-
        if (unlikely(current->lockdep_recursion))
                return;
 
        raw_local_irq_save(flags);
        check_flags(flags);
        current->lockdep_recursion = 1;
+       trace_lock_release(lock, ip);
        __lock_release(lock, nested, ip);
        current->lockdep_recursion = 0;
        raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(lock_release);
 
+int lock_is_held(struct lockdep_map *lock)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       if (unlikely(current->lockdep_recursion))
+               return 1; /* avoid false negative lockdep_assert_held() */
+
+       raw_local_irq_save(flags);
+       check_flags(flags);
+
+       current->lockdep_recursion = 1;
+       ret = __lock_is_held(lock);
+       current->lockdep_recursion = 0;
+       raw_local_irq_restore(flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(lock_is_held);
+
 void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
 {
        current->lockdep_reclaim_gfp = gfp_mask;
@@ -3107,9 +3663,11 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
        if (debug_locks_silent)
                return 0;
 
-       printk("\n=================================\n");
-       printk(  "[ BUG: bad contention detected! ]\n");
-       printk(  "---------------------------------\n");
+       printk("\n");
+       printk("=================================\n");
+       printk("[ BUG: bad contention detected! ]\n");
+       print_kernel_ident();
+       printk("---------------------------------\n");
        printk("%s/%d is trying to contend lock (",
                curr->comm, task_pid_nr(curr));
        print_lockdep_cache(lock);
@@ -3135,6 +3693,10 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
        int i, contention_point, contending_point;
 
        depth = curr->lockdep_depth;
+       /*
+        * Whee, we contended on this lock, except it seems we're not
+        * actually trying to acquire anything much at all..
+        */
        if (DEBUG_LOCKS_WARN_ON(!depth))
                return;
 
@@ -3146,7 +3708,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
                 */
                if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
                        break;
-               if (hlock->instance == lock)
+               if (match_held_lock(hlock, lock))
                        goto found_it;
                prev_hlock = hlock;
        }
@@ -3154,7 +3716,10 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
        return;
 
 found_it:
-       hlock->waittime_stamp = sched_clock();
+       if (hlock->instance != lock)
+               return;
+
+       hlock->waittime_stamp = lockstat_clock();
 
        contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
        contending_point = lock_point(hlock_class(hlock)->contending_point,
@@ -3177,11 +3742,14 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
        struct held_lock *hlock, *prev_hlock;
        struct lock_class_stats *stats;
        unsigned int depth;
-       u64 now;
-       s64 waittime = 0;
+       u64 now, waittime = 0;
        int i, cpu;
 
        depth = curr->lockdep_depth;
+       /*
+        * Yay, we acquired ownership of this lock we didn't try to
+        * acquire, how the heck did that happen?
+        */
        if (DEBUG_LOCKS_WARN_ON(!depth))
                return;
 
@@ -3193,7 +3761,7 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
                 */
                if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
                        break;
-               if (hlock->instance == lock)
+               if (match_held_lock(hlock, lock))
                        goto found_it;
                prev_hlock = hlock;
        }
@@ -3201,14 +3769,17 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
        return;
 
 found_it:
+       if (hlock->instance != lock)
+               return;
+
        cpu = smp_processor_id();
        if (hlock->waittime_stamp) {
-               now = sched_clock();
+               now = lockstat_clock();
                waittime = now - hlock->waittime_stamp;
                hlock->holdtime_stamp = now;
        }
 
-       trace_lock_acquired(lock, ip, waittime);
+       trace_lock_acquired(lock, ip);
 
        stats = get_lock_stats(hlock_class(hlock));
        if (waittime) {
@@ -3229,8 +3800,6 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
        unsigned long flags;
 
-       trace_lock_contended(lock, ip);
-
        if (unlikely(!lock_stat))
                return;
 
@@ -3240,6 +3809,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
        raw_local_irq_save(flags);
        check_flags(flags);
        current->lockdep_recursion = 1;
+       trace_lock_contended(lock, ip);
        __lock_contended(lock, ip);
        current->lockdep_recursion = 0;
        raw_local_irq_restore(flags);
@@ -3378,9 +3948,18 @@ void lockdep_reset_lock(struct lockdep_map *lock)
                if (list_empty(head))
                        continue;
                list_for_each_entry_safe(class, next, head, hash_entry) {
-                       if (unlikely(class == lock->class_cache)) {
-                               if (debug_locks_off_graph_unlock())
+                       int match = 0;
+
+                       for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
+                               match |= class == lock->class_cache[j];
+
+                       if (unlikely(match)) {
+                               if (debug_locks_off_graph_unlock()) {
+                                       /*
+                                        * We all just reset everything, how did it match?
+                                        */
                                        WARN_ON(1);
+                               }
                                goto out_restore;
                        }
                }
@@ -3431,14 +4010,20 @@ void __init lockdep_info(void)
                sizeof(struct list_head) * CLASSHASH_SIZE +
                sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
                sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
-               sizeof(struct list_head) * CHAINHASH_SIZE) / 1024);
+               sizeof(struct list_head) * CHAINHASH_SIZE
+#ifdef CONFIG_PROVE_LOCKING
+               + sizeof(struct circular_queue)
+#endif
+               ) / 1024
+               );
 
        printk(" per task-struct memory footprint: %lu bytes\n",
                sizeof(struct held_lock) * MAX_LOCK_DEPTH);
 
 #ifdef CONFIG_DEBUG_LOCKDEP
        if (lockdep_init_error) {
-               printk("WARNING: lockdep init error! Arch code didn't call lockdep_init() early enough?\n");
+               printk("WARNING: lockdep init error! lock-%s was acquired"
+                       "before lockdep_init\n", lock_init_error);
                printk("Call stack leading to lockdep invocation was:\n");
                print_stack_trace(&lockdep_init_trace, 0);
        }
@@ -3454,9 +4039,11 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
        if (debug_locks_silent)
                return;
 
-       printk("\n=========================\n");
-       printk(  "[ BUG: held lock freed! ]\n");
-       printk(  "-------------------------\n");
+       printk("\n");
+       printk("=========================\n");
+       printk("[ BUG: held lock freed! ]\n");
+       print_kernel_ident();
+       printk("-------------------------\n");
        printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
                curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
        print_lock(hlock);
@@ -3503,29 +4090,30 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 }
 EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
 
-static void print_held_locks_bug(struct task_struct *curr)
+static void print_held_locks_bug(void)
 {
        if (!debug_locks_off())
                return;
        if (debug_locks_silent)
                return;
 
-       printk("\n=====================================\n");
-       printk(  "[ BUG: lock held at task exit time! ]\n");
-       printk(  "-------------------------------------\n");
-       printk("%s/%d is exiting with locks still held!\n",
-               curr->comm, task_pid_nr(curr));
-       lockdep_print_held_locks(curr);
-
+       printk("\n");
+       printk("=====================================\n");
+       printk("[ BUG: %s/%d still has locks held! ]\n",
+              current->comm, task_pid_nr(current));
+       print_kernel_ident();
+       printk("-------------------------------------\n");
+       lockdep_print_held_locks(current);
        printk("\nstack backtrace:\n");
        dump_stack();
 }
 
-void debug_check_no_locks_held(struct task_struct *task)
+void debug_check_no_locks_held(void)
 {
-       if (unlikely(task->lockdep_depth > 0))
-               print_held_locks_bug(task);
+       if (unlikely(current->lockdep_depth > 0))
+               print_held_locks_bug();
 }
+EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
 
 void debug_show_all_locks(void)
 {
@@ -3589,7 +4177,7 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks);
  * Careful: only use this function if you are sure that
  * the task cannot run in parallel!
  */
-void __debug_show_held_locks(struct task_struct *task)
+void debug_show_held_locks(struct task_struct *task)
 {
        if (unlikely(!debug_locks)) {
                printk("INFO: lockdep is turned off.\n");
@@ -3597,12 +4185,6 @@ void __debug_show_held_locks(struct task_struct *task)
        }
        lockdep_print_held_locks(task);
 }
-EXPORT_SYMBOL_GPL(__debug_show_held_locks);
-
-void debug_show_held_locks(struct task_struct *task)
-{
-               __debug_show_held_locks(task);
-}
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
 
 void lockdep_sys_exit(void)
@@ -3612,11 +4194,64 @@ void lockdep_sys_exit(void)
        if (unlikely(curr->lockdep_depth)) {
                if (!debug_locks_off())
                        return;
-               printk("\n================================================\n");
-               printk(  "[ BUG: lock held when returning to user space! ]\n");
-               printk(  "------------------------------------------------\n");
+               printk("\n");
+               printk("================================================\n");
+               printk("[ BUG: lock held when returning to user space! ]\n");
+               print_kernel_ident();
+               printk("------------------------------------------------\n");
                printk("%s/%d is leaving the kernel with locks still held!\n",
                                curr->comm, curr->pid);
                lockdep_print_held_locks(curr);
        }
 }
+
+void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
+{
+       struct task_struct *curr = current;
+
+#ifndef CONFIG_PROVE_RCU_REPEATEDLY
+       if (!debug_locks_off())
+               return;
+#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
+       /* Note: the following can be executed concurrently, so be careful. */
+       printk("\n");
+       printk("===============================\n");
+       printk("[ INFO: suspicious RCU usage. ]\n");
+       print_kernel_ident();
+       printk("-------------------------------\n");
+       printk("%s:%d %s!\n", file, line, s);
+       printk("\nother info that might help us debug this:\n\n");
+       printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
+              !rcu_lockdep_current_cpu_online()
+                       ? "RCU used illegally from offline CPU!\n"
+                       : rcu_is_cpu_idle()
+                               ? "RCU used illegally from idle CPU!\n"
+                               : "",
+              rcu_scheduler_active, debug_locks);
+
+       /*
+        * If a CPU is in the RCU-free window in idle (ie: in the section
+        * between rcu_idle_enter() and rcu_idle_exit(), then RCU
+        * considers that CPU to be in an "extended quiescent state",
+        * which means that RCU will be completely ignoring that CPU.
+        * Therefore, rcu_read_lock() and friends have absolutely no
+        * effect on a CPU running in that state. In other words, even if
+        * such an RCU-idle CPU has called rcu_read_lock(), RCU might well
+        * delete data structures out from under it.  RCU really has no
+        * choice here: we need to keep an RCU-free window in idle where
+        * the CPU may possibly enter into low power mode. This way we can
+        * notice an extended quiescent state to other CPUs that started a grace
+        * period. Otherwise we would delay any grace period as long as we run
+        * in the idle task.
+        *
+        * So complain bitterly if someone does call rcu_read_lock(),
+        * rcu_read_lock_bh() and so on from extended quiescent states.
+        */
+       if (rcu_is_cpu_idle())
+               printk("RCU used illegally from extended quiescent state!\n");
+
+       lockdep_print_held_locks(curr);
+       printk("\nstack backtrace:\n");
+       dump_stack();
+}
+EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);