nvmap: page pools: fix lockdep splat
Colin Cross [Wed, 13 Aug 2014 02:36:04 +0000 (19:36 -0700)]
lockdep complains about the alloc_page call under the pool lock
in nvmap_page_pool_init because the pool lock is also taken during
reclaim in nvmap_page_pool_shrink.  Rewrite nvmap_page_pool_init
to perform the allocation in PENDING_PAGES_SIZE chunks outside the
lock.  Also move the global pending_pages to a static copy in
nvmap_pp_do_background_zero_pages and a kcalloc'd array during
init to avoid conflicts when two threads try to use the same array.

[   57.734407] =================================
[   57.738786] [ INFO: inconsistent lock state ]
[   57.743198] 3.10.40-ge3b2801-dirty #145 Tainted: G        W
[   57.749135] ---------------------------------
[   57.753521] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage.
[   57.760082] kswapd0/41 [HC0[0]:SC0[0]:HE1:SE1] takes:
[   57.765159]  (&pool->lock#2){+.+.?.}, at: [<ffffffc000404d28>] nvmap_page_pool_shrink+0x48/0x9c
[   57.773953] {RECLAIM_FS-ON-W} state was registered at:
[   57.779118]   [<ffffffc000106964>] mark_lock+0x1d0/0x7a8
[   57.784938]   [<ffffffc000107004>] mark_held_locks+0xc8/0x184
[   57.791543]   [<ffffffc000107bd0>] lockdep_trace_alloc+0xb4/0xf8
[   57.797594]   [<ffffffc00015ce24>] __alloc_pages_nodemask+0x98/0x7f8
[   57.803987]   [<ffffffc0004051d8>] nvmap_page_pool_init+0x19c/0x220
[   57.810287]   [<ffffffc0003fbf20>] nvmap_probe+0x9c/0x78c
[   57.815713]   [<ffffffc00049c588>] platform_drv_probe+0x18/0x24
[   57.821662]   [<ffffffc00049aadc>] driver_probe_device+0xa8/0x3a8
[   57.827783]   [<ffffffc00049aed0>] __driver_attach+0xa0/0xa8
[   57.833466]   [<ffffffc000498d64>] bus_for_each_dev+0x58/0x9c
[   57.839238]   [<ffffffc00049a9d0>] driver_attach+0x1c/0x28
[   57.844746]   [<ffffffc000499828>] bus_add_driver+0x1d8/0x294
[   57.850517]   [<ffffffc00049ba10>] driver_register+0x68/0x174
[   57.856289]   [<ffffffc00049cfe0>] platform_driver_register+0x54/0x60
[   57.862759]   [<ffffffc000d61354>] nvmap_init_driver+0x24/0x40
[   57.868617]   [<ffffffc000d328d4>] do_one_initcall+0xac/0x164
[   57.874387]   [<ffffffc000d32af8>] kernel_init_freeable+0x158/0x1f8
[   57.880678]   [<ffffffc0009e7d40>] kernel_init+0x10/0x158
[   57.886103]   [<ffffffc000084cfc>] ret_from_fork+0xc/0x1c
[   57.891526] irq event stamp: 803
[   57.894763] hardirqs last  enabled at (803): [<ffffffc0009fc208>] _raw_spin_unlock_irqrestore+0x64/0x94
[   57.904211] hardirqs last disabled at (802): [<ffffffc0009fbfa8>] _raw_spin_lock_irqsave+0x24/0x9c
[   57.913202] softirqs last  enabled at (0): [<ffffffc0000a508c>] copy_process.part.55+0x448/0x1150
[   57.922108] softirqs last disabled at (0): [<          (null)>]           (null)
[   57.929530]
[   57.929530] other info that might help us debug this:
[   57.936074]  Possible unsafe locking scenario:
[   57.936074]
[   57.942008]        CPU0
[   57.944456]        ----
[   57.946905]   lock(&pool->lock#2);
[   57.950338]   <Interrupt>
[   57.952960]     lock(&pool->lock#2);
[   57.956565]
[   57.956565]  *** DEADLOCK ***
[   57.956565]
[   57.962502] 1 lock held by kswapd0/41:
[   57.966257]  #0:  (shrinker_rwsem){++++..}, at: [<ffffffc0001651cc>] shrink_slab+0x54/0x3b0
[   57.974677]
[   57.974677] stack backtrace:
[   57.979063] CPU: 0 PID: 41 Comm: kswapd0 Tainted: G        W    3.10.40-ge3b2801-dirty #145
[   57.987434] Call trace:
[   57.989888] [<ffffffc0000887e4>] dump_backtrace+0x0/0x188
[   57.995300] [<ffffffc00008897c>] show_stack+0x10/0x1c
[   58.000363] [<ffffffc0009f3490>] dump_stack+0x1c/0x28
[   58.005428] [<ffffffc0009f0d6c>] print_usage_bug.part.37+0x28c/0x2a8
[   58.011799] [<ffffffc0001068f0>] mark_lock+0x15c/0x7a8
[   58.016951] [<ffffffc000109f50>] __lock_acquire+0x7c0/0xd24
[   58.022536] [<ffffffc00010ada8>] lock_acquire+0xa8/0x148
[   58.027862] [<ffffffc0009f870c>] mutex_lock_nested+0x80/0x3e0
[   58.033623] [<ffffffc000404d24>] nvmap_page_pool_shrink+0x44/0x9c
[   58.039731] [<ffffffc000165364>] shrink_slab+0x1ec/0x3b0
[   58.045056] [<ffffffc000168554>] balance_pgdat+0x4d0/0x5d4
[   58.050555] [<ffffffc0001687d0>] kswapd+0x178/0x45c
[   58.055448] [<ffffffc0000d0fa0>] kthread+0xd0/0xdc
[   69.502055] dhd_set_suspend: Remove extra suspend setting
[   69.787786] dhd_set_suspend: force extra Suspend setting

Change-Id: I41e908da9c51f353300f32c50354b4f48b2424c5
Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Krishna Reddy <vdumpa@nvidia.com>
Reviewed-on: http://git-master/r/664675
GVS: Gerrit_Virtual_Submit
Reviewed-on: http://git-master/r/736429
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Tested-by: Alex Waterman <alexw@nvidia.com>

drivers/video/tegra/nvmap/nvmap_pp.c

index 599b76a..af34da6 100644 (file)
@@ -41,7 +41,6 @@ static int pool_size;
 
 static struct task_struct *background_allocator;
 static DECLARE_WAIT_QUEUE_HEAD(nvmap_bg_wait);
-static struct page *pending_pages[PENDING_PAGES_SIZE];
 
 #ifdef CONFIG_NVMAP_PAGE_POOL_DEBUG
 static inline void __pp_dbg_var_add(u64 *dbg_var, u32 nr)
@@ -117,28 +116,34 @@ static void nvmap_pp_do_background_zero_pages(struct nvmap_page_pool *pool)
        struct page *page;
        int ret;
 
+       /*
+        * Statically declared array of pages to be zeroed in a batch,
+        * local to this thread but too big for the stack.
+        */
+       static struct page *pending_zero_pages[PENDING_PAGES_SIZE];
+
        mutex_lock(&pool->lock);
        for (i = 0; i < PENDING_PAGES_SIZE; i++) {
                page = get_zero_list_page(pool);
                if (page == NULL)
                        break;
-               pending_pages[i] = page;
+               pending_zero_pages[i] = page;
        }
        mutex_unlock(&pool->lock);
 
-       ret = nvmap_pp_zero_pages(pending_pages, i);
+       ret = nvmap_pp_zero_pages(pending_zero_pages, i);
        if (ret < 0) {
                ret = 0;
                goto out;
        }
 
        mutex_lock(&pool->lock);
-       ret = __nvmap_page_pool_fill_lots_locked(pool, pending_pages, i);
+       ret = __nvmap_page_pool_fill_lots_locked(pool, pending_zero_pages, i);
        mutex_unlock(&pool->lock);
 
 out:
        for (; ret < i; ret++)
-               __free_page(pending_pages[ret]);
+               __free_page(pending_zero_pages[ret]);
 }
 
 /*
@@ -212,11 +217,9 @@ static struct page *nvmap_page_pool_alloc_locked(struct nvmap_page_pool *pool,
  * Alloc a bunch of pages from the page pool. This will alloc as many as it can
  * and return the number of pages allocated. Pages are placed into the passed
  * array in a linear fashion starting from index 0.
- *
- * You must lock the page pool before using this.
  */
-static int __nvmap_page_pool_alloc_lots_locked(struct nvmap_page_pool *pool,
-                                       struct page **pages, u32 nr)
+int nvmap_page_pool_alloc_lots(struct nvmap_page_pool *pool,
+                               struct page **pages, u32 nr)
 {
        u32 real_nr;
        u32 ind = 0;
@@ -224,6 +227,8 @@ static int __nvmap_page_pool_alloc_lots_locked(struct nvmap_page_pool *pool,
        if (!enable_pp)
                return 0;
 
+       mutex_lock(&pool->lock);
+
        real_nr = min_t(u32, nr, pool->count);
 
        while (real_nr--) {
@@ -237,6 +242,7 @@ static int __nvmap_page_pool_alloc_lots_locked(struct nvmap_page_pool *pool,
                        BUG_ON(atomic_read(&page->_count) != 1);
                }
        }
+       mutex_unlock(&pool->lock);
 
        pp_alloc_add(pool, ind);
        pp_hit_add(pool, ind);
@@ -245,18 +251,6 @@ static int __nvmap_page_pool_alloc_lots_locked(struct nvmap_page_pool *pool,
        return ind;
 }
 
-int nvmap_page_pool_alloc_lots(struct nvmap_page_pool *pool,
-                               struct page **pages, u32 nr)
-{
-       int ret;
-
-       mutex_lock(&pool->lock);
-       ret = __nvmap_page_pool_alloc_lots_locked(pool, pages, nr);
-       mutex_unlock(&pool->lock);
-
-       return ret;
-}
-
 /*
  * Fill a bunch of pages into the page pool. This will fill as many as it can
  * and return the number of pages filled. Pages are used from the start of the