Merge commit 'v2.6.28' into core/core
Ingo Molnar [Thu, 25 Dec 2008 12:51:46 +0000 (13:51 +0100)]
28 files changed:
Documentation/lockstat.txt
arch/um/include/asm/system.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/uaccess_32.h
arch/x86/include/asm/uaccess_64.h
arch/x86/lib/usercopy_32.c
arch/x86/lib/usercopy_64.c
include/linux/debug_locks.h
include/linux/futex.h
include/linux/kernel.h
include/linux/lockdep.h
include/linux/mutex.h
include/linux/rcuclassic.h
include/linux/uaccess.h
kernel/exit.c
kernel/extable.c
kernel/futex.c
kernel/lockdep.c
kernel/lockdep_proc.c
kernel/mutex.c
kernel/notifier.c
kernel/posix-cpu-timers.c
kernel/rcuclassic.c
kernel/sched.c
kernel/softlockup.c
kernel/sys.c
lib/Kconfig.debug
mm/memory.c

index 4ba4664..9cb9138 100644 (file)
@@ -71,35 +71,50 @@ Look at the current lock statistics:
 
 # less /proc/lock_stat
 
-01 lock_stat version 0.2
+01 lock_stat version 0.3
 02 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 03                               class name    con-bounces    contentions   waittime-min   waittime-max waittime-total    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total
 04 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 05
-06               &inode->i_data.tree_lock-W:            15          21657           0.18     1093295.30 11547131054.85             58          10415           0.16          87.51        6387.60
-07               &inode->i_data.tree_lock-R:             0              0           0.00           0.00           0.00          23302         231198           0.25           8.45       98023.38
-08               --------------------------
-09                 &inode->i_data.tree_lock              0          [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190
-10
-11 ...............................................................................................................................................................................................
-12
-13                              dcache_lock:          1037           1161           0.38          45.32         774.51           6611         243371           0.15         306.48       77387.24
-14                              -----------
-15                              dcache_lock            180          [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230
-16                              dcache_lock            165          [<ffffffff802c002a>] d_alloc+0x15a/0x210
-17                              dcache_lock             33          [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70
-18                              dcache_lock              1          [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130
+06                          &mm->mmap_sem-W:           233            538 18446744073708       22924.27      607243.51           1342          45806           1.71        8595.89     1180582.34
+07                          &mm->mmap_sem-R:           205            587 18446744073708       28403.36      731975.00           1940         412426           0.58      187825.45     6307502.88
+08                          ---------------
+09                            &mm->mmap_sem            487          [<ffffffff8053491f>] do_page_fault+0x466/0x928
+10                            &mm->mmap_sem            179          [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
+11                            &mm->mmap_sem            279          [<ffffffff80210a57>] sys_mmap+0x75/0xce
+12                            &mm->mmap_sem             76          [<ffffffff802a490b>] sys_munmap+0x32/0x59
+13                          ---------------
+14                            &mm->mmap_sem            270          [<ffffffff80210a57>] sys_mmap+0x75/0xce
+15                            &mm->mmap_sem            431          [<ffffffff8053491f>] do_page_fault+0x466/0x928
+16                            &mm->mmap_sem            138          [<ffffffff802a490b>] sys_munmap+0x32/0x59
+17                            &mm->mmap_sem            145          [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
+18
+19 ...............................................................................................................................................................................................
+20
+21                              dcache_lock:           621            623           0.52         118.26        1053.02           6745          91930           0.29         316.29      118423.41
+22                              -----------
+23                              dcache_lock            179          [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
+24                              dcache_lock            113          [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
+25                              dcache_lock             99          [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
+26                              dcache_lock            104          [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
+27                              -----------
+28                              dcache_lock            192          [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
+29                              dcache_lock             98          [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
+30                              dcache_lock             72          [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
+31                              dcache_lock            112          [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
 
 This excerpt shows the first two lock class statistics. Line 01 shows the
 output version - each time the format changes this will be updated. Line 02-04
-show the header with column descriptions. Lines 05-10 and 13-18 show the actual
+show the header with column descriptions. Lines 05-18 and 20-31 show the actual
 statistics. These statistics come in two parts; the actual stats separated by a
-short separator (line 08, 14) from the contention points.
+short separator (line 08, 13) from the contention points.
 
-The first lock (05-10) is a read/write lock, and shows two lines above the
+The first lock (05-18) is a read/write lock, and shows two lines above the
 short separator. The contention points don't match the column descriptors,
-they have two: contentions and [<IP>] symbol.
+they have two: contentions and [<IP>] symbol. The second set of contention
+points are the points we're contending with.
 
+The integer part of the time values is in us.
 
 View the top contending locks:
 
index 753346e..ae5f94d 100644 (file)
@@ -11,21 +11,21 @@ extern int get_signals(void);
 extern void block_signals(void);
 extern void unblock_signals(void);
 
-#define local_save_flags(flags) do { typecheck(unsigned long, flags); \
+#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \
                                     (flags) = get_signals(); } while(0)
-#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \
+#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \
                                      set_signals(flags); } while(0)
 
-#define local_irq_save(flags) do { local_save_flags(flags); \
-                                   local_irq_disable(); } while(0)
+#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \
+                                   raw_local_irq_disable(); } while(0)
 
-#define local_irq_enable() unblock_signals()
-#define local_irq_disable() block_signals()
+#define raw_local_irq_enable() unblock_signals()
+#define raw_local_irq_disable() block_signals()
 
 #define irqs_disabled()                 \
 ({                                      \
         unsigned long flags;            \
-        local_save_flags(flags);        \
+        raw_local_save_flags(flags);        \
         (flags == 0);                   \
 })
 
index 35c5492..99192bb 100644 (file)
@@ -157,6 +157,7 @@ extern int __get_user_bad(void);
        int __ret_gu;                                                   \
        unsigned long __val_gu;                                         \
        __chk_user_ptr(ptr);                                            \
+       might_fault();                                                  \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
                __get_user_x(1, __ret_gu, __val_gu, ptr);               \
@@ -241,6 +242,7 @@ extern void __put_user_8(void);
        int __ret_pu;                                           \
        __typeof__(*(ptr)) __pu_val;                            \
        __chk_user_ptr(ptr);                                    \
+       might_fault();                                          \
        __pu_val = x;                                           \
        switch (sizeof(*(ptr))) {                               \
        case 1:                                                 \
index d095a3a..5e06259 100644 (file)
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 static __always_inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-       might_sleep();
-       return __copy_to_user_inatomic(to, from, n);
+       might_fault();
+       return __copy_to_user_inatomic(to, from, n);
 }
 
 static __always_inline unsigned long
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       might_sleep();
+       might_fault();
        if (__builtin_constant_p(n)) {
                unsigned long ret;
 
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 static __always_inline unsigned long __copy_from_user_nocache(void *to,
                                const void __user *from, unsigned long n)
 {
-       might_sleep();
+       might_fault();
        if (__builtin_constant_p(n)) {
                unsigned long ret;
 
index f8cfd00..84210c4 100644 (file)
@@ -29,6 +29,8 @@ static __always_inline __must_check
 int __copy_from_user(void *dst, const void __user *src, unsigned size)
 {
        int ret = 0;
+
+       might_fault();
        if (!__builtin_constant_p(size))
                return copy_user_generic(dst, (__force void *)src, size);
        switch (size) {
@@ -71,6 +73,8 @@ static __always_inline __must_check
 int __copy_to_user(void __user *dst, const void *src, unsigned size)
 {
        int ret = 0;
+
+       might_fault();
        if (!__builtin_constant_p(size))
                return copy_user_generic((__force void *)dst, src, size);
        switch (size) {
@@ -113,6 +117,8 @@ static __always_inline __must_check
 int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
 {
        int ret = 0;
+
+       might_fault();
        if (!__builtin_constant_p(size))
                return copy_user_generic((__force void *)dst,
                                         (__force void *)src, size);
index 9e68075..4a20b2f 100644 (file)
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
 #define __do_strncpy_from_user(dst, src, count, res)                      \
 do {                                                                      \
        int __d0, __d1, __d2;                                              \
-       might_sleep();                                                     \
+       might_fault();                                                     \
        __asm__ __volatile__(                                              \
                "       testl %1,%1\n"                                     \
                "       jz 2f\n"                                           \
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 #define __do_clear_user(addr,size)                                     \
 do {                                                                   \
        int __d0;                                                       \
-       might_sleep();                                                  \
+       might_fault();                                                  \
        __asm__ __volatile__(                                           \
                "0:     rep; stosl\n"                                   \
                "       movl %2,%0\n"                                   \
@@ -155,7 +155,7 @@ do {                                                                        \
 unsigned long
 clear_user(void __user *to, unsigned long n)
 {
-       might_sleep();
+       might_fault();
        if (access_ok(VERIFY_WRITE, to, n))
                __do_clear_user(to, n);
        return n;
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
        unsigned long mask = -__addr_ok(s);
        unsigned long res, tmp;
 
-       might_sleep();
+       might_fault();
 
        __asm__ __volatile__(
                "       testl %0, %0\n"
index f4df6e7..64d6c84 100644 (file)
@@ -15,7 +15,7 @@
 #define __do_strncpy_from_user(dst,src,count,res)                         \
 do {                                                                      \
        long __d0, __d1, __d2;                                             \
-       might_sleep();                                                     \
+       might_fault();                                                     \
        __asm__ __volatile__(                                              \
                "       testq %1,%1\n"                                     \
                "       jz 2f\n"                                           \
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 unsigned long __clear_user(void __user *addr, unsigned long size)
 {
        long __d0;
-       might_sleep();
+       might_fault();
        /* no memory constraint because it doesn't change any memory gcc knows
           about */
        asm volatile(
index 4aaa4af..096476f 100644 (file)
@@ -17,7 +17,7 @@ extern int debug_locks_off(void);
 ({                                                                     \
        int __ret = 0;                                                  \
                                                                        \
-       if (unlikely(c)) {                                              \
+       if (!oops_in_progress && unlikely(c)) {                         \
                if (debug_locks_off() && !debug_locks_silent)           \
                        WARN_ON(1);                                     \
                __ret = 1;                                              \
index 586ab56..8f627b9 100644 (file)
@@ -164,6 +164,8 @@ union futex_key {
        } both;
 };
 
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
+
 #ifdef CONFIG_FUTEX
 extern void exit_robust_list(struct task_struct *curr);
 extern void exit_pi_state_list(struct task_struct *curr);
index dc7e0d0..269df5a 100644 (file)
@@ -141,6 +141,15 @@ extern int _cond_resched(void);
                (__x < 0) ? -__x : __x;         \
        })
 
+#ifdef CONFIG_PROVE_LOCKING
+void might_fault(void);
+#else
+static inline void might_fault(void)
+{
+       might_sleep();
+}
+#endif
+
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(long time);
 NORET_TYPE void panic(const char * fmt, ...)
@@ -188,6 +197,8 @@ extern unsigned long long memparse(const char *ptr, char **retptr);
 extern int core_kernel_text(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
+extern int func_ptr_is_kernel_text(void *ptr);
+
 struct pid;
 extern struct pid *session_of_pgrp(struct pid *pgrp);
 
index 29aec6e..8956daf 100644 (file)
@@ -73,6 +73,8 @@ struct lock_class_key {
        struct lockdep_subclass_key     subkeys[MAX_LOCKDEP_SUBCLASSES];
 };
 
+#define LOCKSTAT_POINTS                4
+
 /*
  * The lock-class itself:
  */
@@ -119,7 +121,8 @@ struct lock_class {
        int                             name_version;
 
 #ifdef CONFIG_LOCK_STAT
-       unsigned long                   contention_point[4];
+       unsigned long                   contention_point[LOCKSTAT_POINTS];
+       unsigned long                   contending_point[LOCKSTAT_POINTS];
 #endif
 };
 
@@ -144,6 +147,7 @@ enum bounce_type {
 
 struct lock_class_stats {
        unsigned long                   contention_point[4];
+       unsigned long                   contending_point[4];
        struct lock_time                read_waittime;
        struct lock_time                write_waittime;
        struct lock_time                read_holdtime;
@@ -165,6 +169,7 @@ struct lockdep_map {
        const char                      *name;
 #ifdef CONFIG_LOCK_STAT
        int                             cpu;
+       unsigned long                   ip;
 #endif
 };
 
@@ -356,7 +361,7 @@ struct lock_class_key { };
 #ifdef CONFIG_LOCK_STAT
 
 extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
-extern void lock_acquired(struct lockdep_map *lock);
+extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
 
 #define LOCK_CONTENDED(_lock, try, lock)                       \
 do {                                                           \
@@ -364,13 +369,13 @@ do {                                                              \
                lock_contended(&(_lock)->dep_map, _RET_IP_);    \
                lock(_lock);                                    \
        }                                                       \
-       lock_acquired(&(_lock)->dep_map);                       \
+       lock_acquired(&(_lock)->dep_map, _RET_IP_);                     \
 } while (0)
 
 #else /* CONFIG_LOCK_STAT */
 
 #define lock_contended(lockdep_map, ip) do {} while (0)
-#define lock_acquired(lockdep_map) do {} while (0)
+#define lock_acquired(lockdep_map, ip) do {} while (0)
 
 #define LOCK_CONTENDED(_lock, try, lock) \
        lock(_lock)
@@ -481,4 +486,22 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 # define lock_map_release(l)                   do { } while (0)
 #endif
 
+#ifdef CONFIG_PROVE_LOCKING
+# define might_lock(lock)                                              \
+do {                                                                   \
+       typecheck(struct lockdep_map *, &(lock)->dep_map);              \
+       lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_);    \
+       lock_release(&(lock)->dep_map, 0, _THIS_IP_);                   \
+} while (0)
+# define might_lock_read(lock)                                                 \
+do {                                                                   \
+       typecheck(struct lockdep_map *, &(lock)->dep_map);              \
+       lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_);    \
+       lock_release(&(lock)->dep_map, 0, _THIS_IP_);                   \
+} while (0)
+#else
+# define might_lock(lock) do { } while (0)
+# define might_lock_read(lock) do { } while (0)
+#endif
+
 #endif /* __LINUX_LOCKDEP_H */
index bc6da10..7a0e5c4 100644 (file)
@@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 /*
  * NOTE: mutex_trylock() follows the spin_trylock() convention,
  *       not the down_trylock() convention!
+ *
+ * Returns 1 if the mutex has been acquired successfully, and 0 on contention.
  */
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
index 5f89b62..301dda8 100644 (file)
@@ -41,7 +41,7 @@
 #include <linux/seqlock.h>
 
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK   ( 3 * HZ) /* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_CHECK   (10 * HZ) /* for rcp->jiffies_stall */
 #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
index fec6dec..6b58367 100644 (file)
@@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
                                                        \
                set_fs(KERNEL_DS);                      \
                pagefault_disable();                    \
-               ret = __get_user(retval, (__force typeof(retval) __user *)(addr));              \
+               ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval));            \
                pagefault_enable();                     \
                set_fs(old_fs);                         \
                ret;                                    \
index 2d8be7e..30fcdf1 100644 (file)
@@ -1321,10 +1321,10 @@ static int wait_task_zombie(struct task_struct *p, int options,
                 * group, which consolidates times for all threads in the
                 * group including the group leader.
                 */
+               thread_group_cputime(p, &cputime);
                spin_lock_irq(&p->parent->sighand->siglock);
                psig = p->parent->signal;
                sig = p->signal;
-               thread_group_cputime(p, &cputime);
                psig->cutime =
                        cputime_add(psig->cutime,
                        cputime_add(cputime.utime,
index a26cb2e..adf0cc9 100644 (file)
@@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr)
                return 1;
        return module_text_address(addr) != NULL;
 }
+
+/*
+ * On some architectures (PPC64, IA64) function pointers
+ * are actually only tokens to some data that then holds the
+ * real function address. As a result, to find if a function
+ * pointer is part of the kernel text, we need to do some
+ * special dereferencing first.
+ */
+int func_ptr_is_kernel_text(void *ptr)
+{
+       unsigned long addr;
+       addr = (unsigned long) dereference_function_descriptor(ptr);
+       if (core_kernel_text(addr))
+               return 1;
+       return module_text_address(addr) != NULL;
+}
index 8af1002..e10c5c8 100644 (file)
@@ -123,24 +123,6 @@ struct futex_hash_bucket {
 static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
 
 /*
- * Take mm->mmap_sem, when futex is shared
- */
-static inline void futex_lock_mm(struct rw_semaphore *fshared)
-{
-       if (fshared)
-               down_read(fshared);
-}
-
-/*
- * Release mm->mmap_sem, when the futex is shared
- */
-static inline void futex_unlock_mm(struct rw_semaphore *fshared)
-{
-       if (fshared)
-               up_read(fshared);
-}
-
-/*
  * We hash on the keys returned from get_futex_key (see below).
  */
 static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -161,6 +143,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
                && key1->both.offset == key2->both.offset);
 }
 
+/*
+ * Take a reference to the resource addressed by a key.
+ * Can be called while holding spinlocks.
+ *
+ */
+static void get_futex_key_refs(union futex_key *key)
+{
+       if (!key->both.ptr)
+               return;
+
+       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+       case FUT_OFF_INODE:
+               atomic_inc(&key->shared.inode->i_count);
+               break;
+       case FUT_OFF_MMSHARED:
+               atomic_inc(&key->private.mm->mm_count);
+               break;
+       }
+}
+
+/*
+ * Drop a reference to the resource addressed by a key.
+ * The hash bucket spinlock must not be held.
+ */
+static void drop_futex_key_refs(union futex_key *key)
+{
+       if (!key->both.ptr)
+               return;
+
+       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+       case FUT_OFF_INODE:
+               iput(key->shared.inode);
+               break;
+       case FUT_OFF_MMSHARED:
+               mmdrop(key->private.mm);
+               break;
+       }
+}
+
 /**
  * get_futex_key - Get parameters which are the keys for a futex.
  * @uaddr: virtual address of the futex
@@ -179,12 +200,10 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
  * For other futexes, it points to &current->mm->mmap_sem and
  * caller must have taken the reader lock. but NOT any spinlocks.
  */
-static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
-                        union futex_key *key)
+static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
 {
        unsigned long address = (unsigned long)uaddr;
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
        struct page *page;
        int err;
 
@@ -208,100 +227,50 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
                        return -EFAULT;
                key->private.mm = mm;
                key->private.address = address;
+               get_futex_key_refs(key);
                return 0;
        }
-       /*
-        * The futex is hashed differently depending on whether
-        * it's in a shared or private mapping.  So check vma first.
-        */
-       vma = find_extend_vma(mm, address);
-       if (unlikely(!vma))
-               return -EFAULT;
 
-       /*
-        * Permissions.
-        */
-       if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
-               return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+again:
+       err = get_user_pages_fast(address, 1, 0, &page);
+       if (err < 0)
+               return err;
+
+       lock_page(page);
+       if (!page->mapping) {
+               unlock_page(page);
+               put_page(page);
+               goto again;
+       }
 
        /*
         * Private mappings are handled in a simple way.
         *
         * NOTE: When userspace waits on a MAP_SHARED mapping, even if
         * it's a read-only handle, it's expected that futexes attach to
-        * the object not the particular process.  Therefore we use
-        * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
-        * mappings of _writable_ handles.
+        * the object not the particular process.
         */
-       if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
-               key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
+       if (PageAnon(page)) {
+               key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                key->private.mm = mm;
                key->private.address = address;
-               return 0;
+       } else {
+               key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+               key->shared.inode = page->mapping->host;
+               key->shared.pgoff = page->index;
        }
 
-       /*
-        * Linear file mappings are also simple.
-        */
-       key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
-       key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
-       if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
-               key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
-                                    + vma->vm_pgoff);
-               return 0;
-       }
+       get_futex_key_refs(key);
 
-       /*
-        * We could walk the page table to read the non-linear
-        * pte, and get the page index without fetching the page
-        * from swap.  But that's a lot of code to duplicate here
-        * for a rare case, so we simply fetch the page.
-        */
-       err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
-       if (err >= 0) {
-               key->shared.pgoff =
-                       page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-               put_page(page);
-               return 0;
-       }
-       return err;
-}
-
-/*
- * Take a reference to the resource addressed by a key.
- * Can be called while holding spinlocks.
- *
- */
-static void get_futex_key_refs(union futex_key *key)
-{
-       if (key->both.ptr == NULL)
-               return;
-       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-               case FUT_OFF_INODE:
-                       atomic_inc(&key->shared.inode->i_count);
-                       break;
-               case FUT_OFF_MMSHARED:
-                       atomic_inc(&key->private.mm->mm_count);
-                       break;
-       }
+       unlock_page(page);
+       put_page(page);
+       return 0;
 }
 
-/*
- * Drop a reference to the resource addressed by a key.
- * The hash bucket spinlock must not be held.
- */
-static void drop_futex_key_refs(union futex_key *key)
+static inline
+void put_futex_key(int fshared, union futex_key *key)
 {
-       if (!key->both.ptr)
-               return;
-       switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-               case FUT_OFF_INODE:
-                       iput(key->shared.inode);
-                       break;
-               case FUT_OFF_MMSHARED:
-                       mmdrop(key->private.mm);
-                       break;
-       }
+       drop_futex_key_refs(key);
 }
 
 static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -328,10 +297,8 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
 
 /*
  * Fault handling.
- * if fshared is non NULL, current->mm->mmap_sem is already held
  */
-static int futex_handle_fault(unsigned long address,
-                             struct rw_semaphore *fshared, int attempt)
+static int futex_handle_fault(unsigned long address, int attempt)
 {
        struct vm_area_struct * vma;
        struct mm_struct *mm = current->mm;
@@ -340,8 +307,7 @@ static int futex_handle_fault(unsigned long address,
        if (attempt > 2)
                return ret;
 
-       if (!fshared)
-               down_read(&mm->mmap_sem);
+       down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
        if (vma && address >= vma->vm_start &&
            (vma->vm_flags & VM_WRITE)) {
@@ -361,8 +327,7 @@ static int futex_handle_fault(unsigned long address,
                                current->min_flt++;
                }
        }
-       if (!fshared)
-               up_read(&mm->mmap_sem);
+       up_read(&mm->mmap_sem);
        return ret;
 }
 
@@ -385,6 +350,7 @@ static int refill_pi_state_cache(void)
        /* pi_mutex gets initialized later */
        pi_state->owner = NULL;
        atomic_set(&pi_state->refcount, 1);
+       pi_state->key = FUTEX_KEY_INIT;
 
        current->pi_state_cache = pi_state;
 
@@ -462,7 +428,7 @@ void exit_pi_state_list(struct task_struct *curr)
        struct list_head *next, *head = &curr->pi_state_list;
        struct futex_pi_state *pi_state;
        struct futex_hash_bucket *hb;
-       union futex_key key;
+       union futex_key key = FUTEX_KEY_INIT;
 
        if (!futex_cmpxchg_enabled)
                return;
@@ -719,20 +685,17 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
  * Wake up all waiters hashed on the physical page that is mapped
  * to this virtual address:
  */
-static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
-                     int nr_wake, u32 bitset)
+static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
        struct plist_head *head;
-       union futex_key key;
+       union futex_key key = FUTEX_KEY_INIT;
        int ret;
 
        if (!bitset)
                return -EINVAL;
 
-       futex_lock_mm(fshared);
-
        ret = get_futex_key(uaddr, fshared, &key);
        if (unlikely(ret != 0))
                goto out;
@@ -760,7 +723,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
 
        spin_unlock(&hb->lock);
 out:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &key);
        return ret;
 }
 
@@ -769,19 +732,16 @@ out:
  * to this virtual address:
  */
 static int
-futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
-             u32 __user *uaddr2,
+futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
              int nr_wake, int nr_wake2, int op)
 {
-       union futex_key key1, key2;
+       union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head;
        struct futex_q *this, *next;
        int ret, op_ret, attempt = 0;
 
 retryfull:
-       futex_lock_mm(fshared);
-
        ret = get_futex_key(uaddr1, fshared, &key1);
        if (unlikely(ret != 0))
                goto out;
@@ -826,18 +786,12 @@ retry:
                 */
                if (attempt++) {
                        ret = futex_handle_fault((unsigned long)uaddr2,
-                                                fshared, attempt);
+                                                attempt);
                        if (ret)
                                goto out;
                        goto retry;
                }
 
-               /*
-                * If we would have faulted, release mmap_sem,
-                * fault it in and start all over again.
-                */
-               futex_unlock_mm(fshared);
-
                ret = get_user(dummy, uaddr2);
                if (ret)
                        return ret;
@@ -873,7 +827,8 @@ retry:
        if (hb1 != hb2)
                spin_unlock(&hb2->lock);
 out:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &key2);
+       put_futex_key(fshared, &key1);
 
        return ret;
 }
@@ -882,19 +837,16 @@ out:
  * Requeue all waiters hashed on one physical page to another
  * physical page.
  */
-static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
-                        u32 __user *uaddr2,
+static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
                         int nr_wake, int nr_requeue, u32 *cmpval)
 {
-       union futex_key key1, key2;
+       union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head1;
        struct futex_q *this, *next;
        int ret, drop_count = 0;
 
  retry:
-       futex_lock_mm(fshared);
-
        ret = get_futex_key(uaddr1, fshared, &key1);
        if (unlikely(ret != 0))
                goto out;
@@ -917,12 +869,6 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
                        if (hb1 != hb2)
                                spin_unlock(&hb2->lock);
 
-                       /*
-                        * If we would have faulted, release mmap_sem, fault
-                        * it in and start all over again.
-                        */
-                       futex_unlock_mm(fshared);
-
                        ret = get_user(curval, uaddr1);
 
                        if (!ret)
@@ -974,7 +920,8 @@ out_unlock:
                drop_futex_key_refs(&key1);
 
 out:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &key2);
+       put_futex_key(fshared, &key1);
        return ret;
 }
 
@@ -1096,8 +1043,7 @@ static void unqueue_me_pi(struct futex_q *q)
  * private futexes.
  */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-                               struct task_struct *newowner,
-                               struct rw_semaphore *fshared)
+                               struct task_struct *newowner, int fshared)
 {
        u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
        struct futex_pi_state *pi_state = q->pi_state;
@@ -1176,7 +1122,7 @@ retry:
 handle_fault:
        spin_unlock(q->lock_ptr);
 
-       ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
+       ret = futex_handle_fault((unsigned long)uaddr, attempt++);
 
        spin_lock(q->lock_ptr);
 
@@ -1200,7 +1146,7 @@ handle_fault:
 
 static long futex_wait_restart(struct restart_block *restart);
 
-static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
+static int futex_wait(u32 __user *uaddr, int fshared,
                      u32 val, ktime_t *abs_time, u32 bitset)
 {
        struct task_struct *curr = current;
@@ -1218,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
        q.pi_state = NULL;
        q.bitset = bitset;
  retry:
-       futex_lock_mm(fshared);
-
+       q.key = FUTEX_KEY_INIT;
        ret = get_futex_key(uaddr, fshared, &q.key);
        if (unlikely(ret != 0))
                goto out_release_sem;
@@ -1251,12 +1196,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
        if (unlikely(ret)) {
                queue_unlock(&q, hb);
 
-               /*
-                * If we would have faulted, release mmap_sem, fault it in and
-                * start all over again.
-                */
-               futex_unlock_mm(fshared);
-
                ret = get_user(uval, uaddr);
 
                if (!ret)
@@ -1271,12 +1210,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
        queue_me(&q, hb);
 
        /*
-        * Now the futex is queued and we have checked the data, we
-        * don't want to hold mmap_sem while we sleep.
-        */
-       futex_unlock_mm(fshared);
-
-       /*
         * There might have been scheduling since the queue_me(), as we
         * cannot hold a spinlock across the get_user() in case it
         * faults, and we cannot just set TASK_INTERRUPTIBLE state when
@@ -1363,7 +1296,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
        queue_unlock(&q, hb);
 
  out_release_sem:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &q.key);
        return ret;
 }
 
@@ -1371,13 +1304,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
 static long futex_wait_restart(struct restart_block *restart)
 {
        u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
-       struct rw_semaphore *fshared = NULL;
+       int fshared = 0;
        ktime_t t;
 
        t.tv64 = restart->futex.time;
        restart->fn = do_no_restart_syscall;
        if (restart->futex.flags & FLAGS_SHARED)
-               fshared = &current->mm->mmap_sem;
+               fshared = 1;
        return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
                                restart->futex.bitset);
 }
@@ -1389,7 +1322,7 @@ static long futex_wait_restart(struct restart_block *restart)
  * if there are waiters then it will block, it does PI, etc. (Due to
  * races the kernel might see a 0 value of the futex too.)
  */
-static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
+static int futex_lock_pi(u32 __user *uaddr, int fshared,
                         int detect, ktime_t *time, int trylock)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
@@ -1412,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 
        q.pi_state = NULL;
  retry:
-       futex_lock_mm(fshared);
-
+       q.key = FUTEX_KEY_INIT;
        ret = get_futex_key(uaddr, fshared, &q.key);
        if (unlikely(ret != 0))
                goto out_release_sem;
@@ -1502,7 +1434,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
                         * exit to complete.
                         */
                        queue_unlock(&q, hb);
-                       futex_unlock_mm(fshared);
                        cond_resched();
                        goto retry;
 
@@ -1534,12 +1465,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
         */
        queue_me(&q, hb);
 
-       /*
-        * Now the futex is queued and we have checked the data, we
-        * don't want to hold mmap_sem while we sleep.
-        */
-       futex_unlock_mm(fshared);
-
        WARN_ON(!q.pi_state);
        /*
         * Block on the PI mutex:
@@ -1552,7 +1477,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
                ret = ret ? 0 : -EWOULDBLOCK;
        }
 
-       futex_lock_mm(fshared);
        spin_lock(q.lock_ptr);
 
        if (!ret) {
@@ -1618,7 +1542,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 
        /* Unqueue and drop the lock */
        unqueue_me_pi(&q);
-       futex_unlock_mm(fshared);
 
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
@@ -1628,7 +1551,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
        queue_unlock(&q, hb);
 
  out_release_sem:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &q.key);
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
        return ret;
@@ -1645,15 +1568,12 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
        queue_unlock(&q, hb);
 
        if (attempt++) {
-               ret = futex_handle_fault((unsigned long)uaddr, fshared,
-                                        attempt);
+               ret = futex_handle_fault((unsigned long)uaddr, attempt);
                if (ret)
                        goto out_release_sem;
                goto retry_unlocked;
        }
 
-       futex_unlock_mm(fshared);
-
        ret = get_user(uval, uaddr);
        if (!ret && (uval != -EFAULT))
                goto retry;
@@ -1668,13 +1588,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
  * This is the in-kernel slowpath: we look up the PI state (if any),
  * and do the rt-mutex unlock.
  */
-static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
+static int futex_unlock_pi(u32 __user *uaddr, int fshared)
 {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
        u32 uval;
        struct plist_head *head;
-       union futex_key key;
+       union futex_key key = FUTEX_KEY_INIT;
        int ret, attempt = 0;
 
 retry:
@@ -1685,10 +1605,6 @@ retry:
         */
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                return -EPERM;
-       /*
-        * First take all the futex related locks:
-        */
-       futex_lock_mm(fshared);
 
        ret = get_futex_key(uaddr, fshared, &key);
        if (unlikely(ret != 0))
@@ -1747,7 +1663,7 @@ retry_unlocked:
 out_unlock:
        spin_unlock(&hb->lock);
 out:
-       futex_unlock_mm(fshared);
+       put_futex_key(fshared, &key);
 
        return ret;
 
@@ -1763,16 +1679,13 @@ pi_faulted:
        spin_unlock(&hb->lock);
 
        if (attempt++) {
-               ret = futex_handle_fault((unsigned long)uaddr, fshared,
-                                        attempt);
+               ret = futex_handle_fault((unsigned long)uaddr, attempt);
                if (ret)
                        goto out;
                uval = 0;
                goto retry_unlocked;
        }
 
-       futex_unlock_mm(fshared);
-
        ret = get_user(uval, uaddr);
        if (!ret && (uval != -EFAULT))
                goto retry;
@@ -1898,8 +1811,7 @@ retry:
                 * PI futexes happens in exit_pi_state():
                 */
                if (!pi && (uval & FUTEX_WAITERS))
-                       futex_wake(uaddr, &curr->mm->mmap_sem, 1,
-                                  FUTEX_BITSET_MATCH_ANY);
+                       futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
        }
        return 0;
 }
@@ -1995,10 +1907,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 {
        int ret = -ENOSYS;
        int cmd = op & FUTEX_CMD_MASK;
-       struct rw_semaphore *fshared = NULL;
+       int fshared = 0;
 
        if (!(op & FUTEX_PRIVATE_FLAG))
-               fshared = &current->mm->mmap_sem;
+               fshared = 1;
 
        switch (cmd) {
        case FUTEX_WAIT:
index 46a4041..e4bdda8 100644 (file)
@@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
 #ifdef CONFIG_LOCK_STAT
 static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
 
-static int lock_contention_point(struct lock_class *class, unsigned long ip)
+static int lock_point(unsigned long points[], unsigned long ip)
 {
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
-               if (class->contention_point[i] == 0) {
-                       class->contention_point[i] = ip;
+       for (i = 0; i < LOCKSTAT_POINTS; i++) {
+               if (points[i] == 0) {
+                       points[i] = ip;
                        break;
                }
-               if (class->contention_point[i] == ip)
+               if (points[i] == ip)
                        break;
        }
 
@@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class)
                for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
                        stats.contention_point[i] += pcs->contention_point[i];
 
+               for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
+                       stats.contending_point[i] += pcs->contending_point[i];
+
                lock_time_add(&pcs->read_waittime, &stats.read_waittime);
                lock_time_add(&pcs->write_waittime, &stats.write_waittime);
 
@@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class)
                memset(cpu_stats, 0, sizeof(struct lock_class_stats));
        }
        memset(class->contention_point, 0, sizeof(class->contention_point));
+       memset(class->contending_point, 0, sizeof(class->contending_point));
 }
 
 static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@ -2999,7 +3003,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
        struct held_lock *hlock, *prev_hlock;
        struct lock_class_stats *stats;
        unsigned int depth;
-       int i, point;
+       int i, contention_point, contending_point;
 
        depth = curr->lockdep_depth;
        if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3023,18 +3027,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
 found_it:
        hlock->waittime_stamp = sched_clock();
 
-       point = lock_contention_point(hlock_class(hlock), ip);
+       contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
+       contending_point = lock_point(hlock_class(hlock)->contending_point,
+                                     lock->ip);
 
        stats = get_lock_stats(hlock_class(hlock));
-       if (point < ARRAY_SIZE(stats->contention_point))
-               stats->contention_point[point]++;
+       if (contention_point < LOCKSTAT_POINTS)
+               stats->contention_point[contention_point]++;
+       if (contending_point < LOCKSTAT_POINTS)
+               stats->contending_point[contending_point]++;
        if (lock->cpu != smp_processor_id())
                stats->bounces[bounce_contended + !!hlock->read]++;
        put_lock_stats(stats);
 }
 
 static void
-__lock_acquired(struct lockdep_map *lock)
+__lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
        struct task_struct *curr = current;
        struct held_lock *hlock, *prev_hlock;
@@ -3083,6 +3091,7 @@ found_it:
        put_lock_stats(stats);
 
        lock->cpu = cpu;
+       lock->ip = ip;
 }
 
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@ -3104,7 +3113,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 }
 EXPORT_SYMBOL_GPL(lock_contended);
 
-void lock_acquired(struct lockdep_map *lock)
+void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
        unsigned long flags;
 
@@ -3117,7 +3126,7 @@ void lock_acquired(struct lockdep_map *lock)
        raw_local_irq_save(flags);
        check_flags(flags);
        current->lockdep_recursion = 1;
-       __lock_acquired(lock);
+       __lock_acquired(lock, ip);
        current->lockdep_recursion = 0;
        raw_local_irq_restore(flags);
 }
index 20dbcbf..13716b8 100644 (file)
@@ -470,11 +470,12 @@ static void seq_line(struct seq_file *m, char c, int offset, int length)
 
 static void snprint_time(char *buf, size_t bufsiz, s64 nr)
 {
-       unsigned long rem;
+       s64 div;
+       s32 rem;
 
        nr += 5; /* for display rounding */
-       rem = do_div(nr, 1000); /* XXX: do_div_signed */
-       snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10);
+       div = div_s64_rem(nr, 1000, &rem);
+       snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
 }
 
 static void seq_time(struct seq_file *m, s64 time)
@@ -556,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
        if (stats->read_holdtime.nr)
                namelen += 2;
 
-       for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
+       for (i = 0; i < LOCKSTAT_POINTS; i++) {
                char sym[KSYM_SYMBOL_LEN];
                char ip[32];
 
@@ -573,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
                                stats->contention_point[i],
                                ip, sym);
        }
+       for (i = 0; i < LOCKSTAT_POINTS; i++) {
+               char sym[KSYM_SYMBOL_LEN];
+               char ip[32];
+
+               if (class->contending_point[i] == 0)
+                       break;
+
+               if (!i)
+                       seq_line(m, '-', 40-namelen, namelen);
+
+               sprint_symbol(sym, class->contending_point[i]);
+               snprintf(ip, sizeof(ip), "[<%p>]",
+                               (void *)class->contending_point[i]);
+               seq_printf(m, "%40s %14lu %29s %s\n", name,
+                               stats->contending_point[i],
+                               ip, sym);
+       }
        if (i) {
                seq_puts(m, "\n");
                seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
@@ -582,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 
 static void seq_header(struct seq_file *m)
 {
-       seq_printf(m, "lock_stat version 0.2\n");
+       seq_printf(m, "lock_stat version 0.3\n");
        seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
        seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
                        "%14s %14s\n",
index 12c779d..4f45d4b 100644 (file)
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
  * We also put the fastpath first in the kernel image, to make sure the
  * branch is predicted by the CPU as default-untaken.
  */
-static void noinline __sched
+static __used noinline void __sched
 __mutex_lock_slowpath(atomic_t *lock_count);
 
 /***
@@ -96,7 +96,7 @@ void inline __sched mutex_lock(struct mutex *lock)
 EXPORT_SYMBOL(mutex_lock);
 #endif
 
-static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
+static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
 
 /***
  * mutex_unlock - release the mutex
@@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        }
 
 done:
-       lock_acquired(&lock->dep_map);
+       lock_acquired(&lock->dep_map, ip);
        /* got the lock - rejoice! */
        mutex_remove_waiter(lock, &waiter, task_thread_info(task));
        debug_mutex_set_owner(lock, task_thread_info(task));
@@ -268,7 +268,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
 /*
  * Release the lock, slowpath:
  */
-static noinline void
+static __used noinline void
 __mutex_unlock_slowpath(atomic_t *lock_count)
 {
        __mutex_unlock_common_slowpath(lock_count, 1);
@@ -313,7 +313,7 @@ int __sched mutex_lock_killable(struct mutex *lock)
 }
 EXPORT_SYMBOL(mutex_lock_killable);
 
-static noinline void __sched
+static __used noinline void __sched
 __mutex_lock_slowpath(atomic_t *lock_count)
 {
        struct mutex *lock = container_of(lock_count, struct mutex, count);
index 4282c0a..61d5aa5 100644 (file)
@@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
 
        while (nb && nr_to_call) {
                next_nb = rcu_dereference(nb->next);
+
+#ifdef CONFIG_DEBUG_NOTIFIERS
+               if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
+                       WARN(1, "Invalid notifier called!");
+                       nb = next_nb;
+                       continue;
+               }
+#endif
                ret = nb->notifier_call(nb, val, v);
 
                if (nr_calls)
index 4e5288a..157de3a 100644 (file)
@@ -58,21 +58,21 @@ void thread_group_cputime(
        struct task_struct *tsk,
        struct task_cputime *times)
 {
-       struct signal_struct *sig;
+       struct task_cputime *totals, *tot;
        int i;
-       struct task_cputime *tot;
 
-       sig = tsk->signal;
-       if (unlikely(!sig) || !sig->cputime.totals) {
+       totals = tsk->signal->cputime.totals;
+       if (!totals) {
                times->utime = tsk->utime;
                times->stime = tsk->stime;
                times->sum_exec_runtime = tsk->se.sum_exec_runtime;
                return;
        }
+
        times->stime = times->utime = cputime_zero;
        times->sum_exec_runtime = 0;
        for_each_possible_cpu(i) {
-               tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
+               tot = per_cpu_ptr(totals, i);
                times->utime = cputime_add(times->utime, tot->utime);
                times->stime = cputime_add(times->stime, tot->stime);
                times->sum_exec_runtime += tot->sum_exec_runtime;
index 37f72e5..e503a00 100644 (file)
@@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
 
        /* OK, time to rat on our buddy... */
 
-       printk(KERN_ERR "RCU detected CPU stalls:");
+       printk(KERN_ERR "INFO: RCU detected CPU stalls:");
        for_each_possible_cpu(cpu) {
                if (cpu_isset(cpu, rcp->cpumask))
                        printk(" %d", cpu);
@@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp)
 {
        unsigned long flags;
 
-       printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+       printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
                        smp_processor_id(), jiffies,
                        jiffies - rcp->gp_start);
        dump_stack();
index e4bb1dd..3e70963 100644 (file)
@@ -4203,7 +4203,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 
        if (p == rq->idle) {
                p->stime = cputime_add(p->stime, steal);
-               account_group_system_time(p, steal);
                if (atomic_read(&rq->nr_iowait) > 0)
                        cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                else
@@ -4339,7 +4338,7 @@ void __kprobes sub_preempt_count(int val)
        /*
         * Underflow?
         */
-       if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+       if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
                return;
        /*
         * Is the spinlock portion underflowing?
index dc0b3be..1ab790c 100644 (file)
@@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
 /*
  * Zero means infinite timeout - no checking done:
  */
-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
 
 unsigned long __read_mostly sysctl_hung_task_warnings = 10;
 
index 31deba8..5fc3a0c 100644 (file)
@@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms)
        struct task_cputime cputime;
        cputime_t cutime, cstime;
 
-       spin_lock_irq(&current->sighand->siglock);
        thread_group_cputime(current, &cputime);
+       spin_lock_irq(&current->sighand->siglock);
        cutime = current->signal->cutime;
        cstime = current->signal->cstime;
        spin_unlock_irq(&current->sighand->siglock);
index b0f239e..1e3fd3e 100644 (file)
@@ -545,6 +545,16 @@ config DEBUG_SG
 
          If unsure, say N.
 
+config DEBUG_NOTIFIERS
+       bool "Debug notifier call chains"
+       depends on DEBUG_KERNEL
+       help
+         Enable this to turn on sanity checking for notifier call chains.
+         This is most useful for kernel developers to make sure that
+         modules properly unregister themselves from notifier chains.
+         This is a relatively cheap check but if you care about maximum
+         performance, say N.
+
 config FRAME_POINTER
        bool "Compile the kernel with frame pointers"
        depends on DEBUG_KERNEL && \
index 164951c..fc031d6 100644 (file)
@@ -3049,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
        }
        up_read(&current->mm->mmap_sem);
 }
+
+#ifdef CONFIG_PROVE_LOCKING
+void might_fault(void)
+{
+       might_sleep();
+       /*
+        * it would be nicer only to annotate paths which are not under
+        * pagefault_disable, however that requires a larger audit and
+        * providing helpers like get_user_atomic.
+        */
+       if (!in_atomic() && current->mm)
+               might_lock_read(&current->mm->mmap_sem);
+}
+EXPORT_SYMBOL(might_fault);
+#endif