x86: fix deadlock, make pgd_lock irq-safe
Ingo Molnar [Wed, 6 Feb 2008 21:39:45 +0000 (22:39 +0100)]
lockdep just caught this one:

=================================
[ INFO: inconsistent lock state ]
2.6.24 #38
---------------------------------
inconsistent {in-softirq-W} -> {softirq-on-W} usage.
swapper/1 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (pgd_lock){-+..}, at: [<ffffffff8022a9ea>] mm_init+0x1da/0x250
{in-softirq-W} state was registered at:
  [<ffffffffffffffff>] 0xffffffffffffffff
irq event stamp: 394559
hardirqs last  enabled at (394559): [<ffffffff80267f0a>] get_page_from_freelist+0x30a/0x4c0
hardirqs last disabled at (394558): [<ffffffff80267d25>] get_page_from_freelist+0x125/0x4c0
softirqs last  enabled at (393952): [<ffffffff80232f8e>] __do_softirq+0xce/0xe0
softirqs last disabled at (393945): [<ffffffff8020c57c>] call_softirq+0x1c/0x30

other info that might help us debug this:
no locks held by swapper/1.

stack backtrace:
Pid: 1, comm: swapper Not tainted 2.6.24 #38

Call Trace:
 [<ffffffff8024e1fb>] print_usage_bug+0x18b/0x190
 [<ffffffff8024f55d>] mark_lock+0x53d/0x560
 [<ffffffff8024fffa>] __lock_acquire+0x3ca/0xed0
 [<ffffffff80250ba8>] lock_acquire+0xa8/0xe0
 [<ffffffff8022a9ea>] ? mm_init+0x1da/0x250
 [<ffffffff809bcd10>] _spin_lock+0x30/0x70
 [<ffffffff8022a9ea>] mm_init+0x1da/0x250
 [<ffffffff8022aa99>] mm_alloc+0x39/0x50
 [<ffffffff8028b95a>] bprm_mm_init+0x2a/0x1a0
 [<ffffffff8028d12b>] do_execve+0x7b/0x220
 [<ffffffff80209776>] sys_execve+0x46/0x70
 [<ffffffff8020c214>] kernel_execve+0x64/0xd0
 [<ffffffff8020901e>] ? _stext+0x1e/0x20
 [<ffffffff802090ba>] init_post+0x9a/0xf0
 [<ffffffff809bc5f6>] ? trace_hardirqs_on_thunk+0x35/0x3a
 [<ffffffff8024f75a>] ? trace_hardirqs_on+0xba/0xd0
 [<ffffffff8020c1a8>] ? child_rip+0xa/0x12
 [<ffffffff8020bcbc>] ? restore_args+0x0/0x44
 [<ffffffff8020c19e>] ? child_rip+0x0/0x12

turns out that pgd_lock has been used on 64-bit x86 in an irq-unsafe
way for almost two years, since commit 8c914cb704a11460e.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

arch/x86/mm/fault.c
include/asm-x86/pgalloc_64.h

index d8ed400..621afb6 100644 (file)
@@ -958,11 +958,12 @@ void vmalloc_sync_all(void)
        for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
                if (!test_bit(pgd_index(address), insync)) {
                        const pgd_t *pgd_ref = pgd_offset_k(address);
+                       unsigned long flags;
                        struct page *page;
 
                        if (pgd_none(*pgd_ref))
                                continue;
-                       spin_lock(&pgd_lock);
+                       spin_lock_irqsave(&pgd_lock, flags);
                        list_for_each_entry(page, &pgd_list, lru) {
                                pgd_t *pgd;
                                pgd = (pgd_t *)page_address(page) + pgd_index(address);
@@ -971,7 +972,7 @@ void vmalloc_sync_all(void)
                                else
                                        BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
                        }
-                       spin_unlock(&pgd_lock);
+                       spin_unlock_irqrestore(&pgd_lock, flags);
                        set_bit(pgd_index(address), insync);
                }
                if (address == start)
index 315314c..4f6220d 100644 (file)
@@ -42,19 +42,21 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 static inline void pgd_list_add(pgd_t *pgd)
 {
        struct page *page = virt_to_page(pgd);
+       unsigned long flags;
 
-       spin_lock(&pgd_lock);
+       spin_lock_irqsave(&pgd_lock, flags);
        list_add(&page->lru, &pgd_list);
-       spin_unlock(&pgd_lock);
+       spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
 static inline void pgd_list_del(pgd_t *pgd)
 {
        struct page *page = virt_to_page(pgd);
+       unsigned long flags;
 
-       spin_lock(&pgd_lock);
+       spin_lock_irqsave(&pgd_lock, flags);
        list_del(&page->lru);
-       spin_unlock(&pgd_lock);
+       spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)