slub: free slabs without holding locks
Christoph Lameter [Tue, 9 Aug 2011 21:12:22 +0000 (16:12 -0500)]
There are two situations in which slub holds a lock while releasing
pages:

A. During kmem_cache_shrink()
B. During kmem_cache_close()

For A build a list while holding the lock and then release the pages
later. In case of B we are the last remaining user of the slab so
there is no need to take the listlock.

After this patch all calls to the page allocator to free pages are
done without holding any spinlocks. kmem_cache_destroy() will still
hold the slub_lock semaphore.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

mm/slub.c

index 9f662d7..30c4558 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2970,13 +2970,13 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
 
 /*
  * Attempt to free all partial slabs on a node.
+ * This is called from kmem_cache_close(). We must be the last thread
+ * using the cache and therefore we do not need to lock anymore.
  */
 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 {
-       unsigned long flags;
        struct page *page, *h;
 
-       spin_lock_irqsave(&n->list_lock, flags);
        list_for_each_entry_safe(page, h, &n->partial, lru) {
                if (!page->inuse) {
                        remove_partial(n, page);
@@ -2986,7 +2986,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
                                "Objects remaining on kmem_cache_close()");
                }
        }
-       spin_unlock_irqrestore(&n->list_lock, flags);
 }
 
 /*
@@ -3020,6 +3019,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
        s->refcount--;
        if (!s->refcount) {
                list_del(&s->list);
+               up_write(&slub_lock);
                if (kmem_cache_close(s)) {
                        printk(KERN_ERR "SLUB %s: %s called for cache that "
                                "still has objects.\n", s->name, __func__);
@@ -3028,8 +3028,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
                if (s->flags & SLAB_DESTROY_BY_RCU)
                        rcu_barrier();
                sysfs_slab_remove(s);
-       }
-       up_write(&slub_lock);
+       } else
+               up_write(&slub_lock);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
@@ -3347,23 +3347,23 @@ int kmem_cache_shrink(struct kmem_cache *s)
                 * list_lock. page->inuse here is the upper limit.
                 */
                list_for_each_entry_safe(page, t, &n->partial, lru) {
-                       if (!page->inuse) {
-                               remove_partial(n, page);
-                               discard_slab(s, page);
-                       } else {
-                               list_move(&page->lru,
-                               slabs_by_inuse + page->inuse);
-                       }
+                       list_move(&page->lru, slabs_by_inuse + page->inuse);
+                       if (!page->inuse)
+                               n->nr_partial--;
                }
 
                /*
                 * Rebuild the partial list with the slabs filled up most
                 * first and the least used slabs at the end.
                 */
-               for (i = objects - 1; i >= 0; i--)
+               for (i = objects - 1; i > 0; i--)
                        list_splice(slabs_by_inuse + i, n->partial.prev);
 
                spin_unlock_irqrestore(&n->list_lock, flags);
+
+               /* Release empty slabs */
+               list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
+                       discard_slab(s, page);
        }
 
        kfree(slabs_by_inuse);