cgroups: add per-thread subsystem callbacks
[linux-2.6.git] / mm / dmapool.c
index b5034dc..03bf3bb 100644 (file)
@@ -1,46 +1,72 @@
+/*
+ * DMA Pool allocator
+ *
+ * Copyright 2001 David Brownell
+ * Copyright 2007 Intel Corporation
+ *   Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * This software may be redistributed and/or modified under the terms of
+ * the GNU General Public License ("GPL") version 2 as published by the
+ * Free Software Foundation.
+ *
+ * This allocator returns small blocks of a given size which are DMA-able by
+ * the given device.  It uses the dma_alloc_coherent page allocator to get
+ * new pages, then splits them up into blocks of the required size.
+ * Many older drivers still have their own code to do this.
+ *
+ * The current design of this allocator is fairly simple.  The pool is
+ * represented by the 'struct dma_pool' which keeps a doubly-linked list of
+ * allocated pages.  Each page in the page_list is split into blocks of at
+ * least 'size' bytes.  Free blocks are tracked in an unsorted singly-linked
+ * list of free blocks within the page.  Used blocks aren't tracked, but we
+ * keep a count of how many are currently allocated from each page.
+ */
 
 #include <linux/device.h>
-#include <linux/mm.h>
-#include <asm/io.h>            /* Needed for i386 to build */
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
-#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/poison.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/wait.h>
 
-/*
- * Pool allocator ... wraps the dma_alloc_coherent page allocator, so
- * small blocks are easily used by drivers for bus mastering controllers.
- * This should probably be sharing the guts of the slab allocator.
- */
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
+#define DMAPOOL_DEBUG 1
+#endif
 
-struct dma_pool {      /* the pool */
-       struct list_head        page_list;
-       spinlock_t              lock;
-       size_t                  blocks_per_page;
-       size_t                  size;
-       struct device           *dev;
-       size_t                  allocation;
-       char                    name [32];
-       wait_queue_head_t       waitq;
-       struct list_head        pools;
+struct dma_pool {              /* the pool */
+       struct list_head page_list;
+       spinlock_t lock;
+       size_t size;
+       struct device *dev;
+       size_t allocation;
+       size_t boundary;
+       char name[32];
+       wait_queue_head_t waitq;
+       struct list_head pools;
 };
 
-struct dma_page {      /* cacheable header for 'allocation' bytes */
-       struct list_head        page_list;
-       void                    *vaddr;
-       dma_addr_t              dma;
-       unsigned                in_use;
-       unsigned long           bitmap [0];
+struct dma_page {              /* cacheable header for 'allocation' bytes */
+       struct list_head page_list;
+       void *vaddr;
+       dma_addr_t dma;
+       unsigned int in_use;
+       unsigned int offset;
 };
 
 #define        POOL_TIMEOUT_JIFFIES    ((100 /* msec */ * HZ) / 1000)
 
-static DEFINE_MUTEX (pools_lock);
+static DEFINE_MUTEX(pools_lock);
 
 static ssize_t
-show_pools (struct device *dev, struct device_attribute *attr, char *buf)
+show_pools(struct device *dev, struct device_attribute *attr, char *buf)
 {
        unsigned temp;
        unsigned size;
@@ -60,16 +86,18 @@ show_pools (struct device *dev, struct device_attribute *attr, char *buf)
                unsigned pages = 0;
                unsigned blocks = 0;
 
+               spin_lock_irq(&pool->lock);
                list_for_each_entry(page, &pool->page_list, page_list) {
                        pages++;
                        blocks += page->in_use;
                }
+               spin_unlock_irq(&pool->lock);
 
                /* per-pool info, no real statistics yet */
                temp = scnprintf(next, size, "%-16s %4u %4Zu %4Zu %2u\n",
-                               pool->name,
-                               blocks, pages * pool->blocks_per_page,
-                               pool->size, pages);
+                                pool->name, blocks,
+                                pages * (pool->allocation / pool->size),
+                                pool->size, pages);
                size -= temp;
                next += temp;
        }
@@ -77,7 +105,8 @@ show_pools (struct device *dev, struct device_attribute *attr, char *buf)
 
        return PAGE_SIZE - size;
 }
-static DEVICE_ATTR (pools, S_IRUGO, show_pools, NULL);
+
+static DEVICE_ATTR(pools, S_IRUGO, show_pools, NULL);
 
 /**
  * dma_pool_create - Creates a pool of consistent memory blocks, for dma.
@@ -85,7 +114,7 @@ static DEVICE_ATTR (pools, S_IRUGO, show_pools, NULL);
  * @dev: device that will be doing the DMA
  * @size: size of the blocks in this pool.
  * @align: alignment requirement for blocks; must be a power of two
- * @allocation: returned blocks won't cross this boundary (or zero)
+ * @boundary: returned blocks won't cross this power of two boundary
  * Context: !in_interrupt()
  *
  * Returns a dma allocation pool with the requested characteristics, or
@@ -95,131 +124,135 @@ static DEVICE_ATTR (pools, S_IRUGO, show_pools, NULL);
  * cache flushing primitives.  The actual size of blocks allocated may be
  * larger than requested because of alignment.
  *
- * If allocation is nonzero, objects returned from dma_pool_alloc() won't
+ * If @boundary is nonzero, objects returned from dma_pool_alloc() won't
  * cross that size boundary.  This is useful for devices which have
  * addressing restrictions on individual DMA transfers, such as not crossing
  * boundaries of 4KBytes.
  */
-struct dma_pool *
-dma_pool_create (const char *name, struct device *dev,
-       size_t size, size_t align, size_t allocation)
+struct dma_pool *dma_pool_create(const char *name, struct device *dev,
+                                size_t size, size_t align, size_t boundary)
 {
-       struct dma_pool         *retval;
+       struct dma_pool *retval;
+       size_t allocation;
 
-       if (align == 0)
+       if (align == 0) {
                align = 1;
-       if (size == 0)
+       } else if (align & (align - 1)) {
                return NULL;
-       else if (size < align)
-               size = align;
-       else if ((size % align) != 0) {
-               size += align + 1;
-               size &= ~(align - 1);
        }
 
-       if (allocation == 0) {
-               if (PAGE_SIZE < size)
-                       allocation = size;
-               else
-                       allocation = PAGE_SIZE;
-               // FIXME: round up for less fragmentation
-       } else if (allocation < size)
+       if (size == 0) {
                return NULL;
+       } else if (size < 4) {
+               size = 4;
+       }
+
+       if ((size % align) != 0)
+               size = ALIGN(size, align);
+
+       allocation = max_t(size_t, size, PAGE_SIZE);
+
+       if (!boundary) {
+               boundary = allocation;
+       } else if ((boundary < size) || (boundary & (boundary - 1))) {
+               return NULL;
+       }
 
-       if (!(retval = kmalloc_node (sizeof *retval, GFP_KERNEL, dev_to_node(dev))))
+       retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev));
+       if (!retval)
                return retval;
 
-       strlcpy (retval->name, name, sizeof retval->name);
+       strlcpy(retval->name, name, sizeof(retval->name));
 
        retval->dev = dev;
 
-       INIT_LIST_HEAD (&retval->page_list);
-       spin_lock_init (&retval->lock);
+       INIT_LIST_HEAD(&retval->page_list);
+       spin_lock_init(&retval->lock);
        retval->size = size;
+       retval->boundary = boundary;
        retval->allocation = allocation;
-       retval->blocks_per_page = allocation / size;
-       init_waitqueue_head (&retval->waitq);
+       init_waitqueue_head(&retval->waitq);
 
        if (dev) {
                int ret;
 
                mutex_lock(&pools_lock);
-               if (list_empty (&dev->dma_pools))
-                       ret = device_create_file (dev, &dev_attr_pools);
+               if (list_empty(&dev->dma_pools))
+                       ret = device_create_file(dev, &dev_attr_pools);
                else
                        ret = 0;
                /* note:  not currently insisting "name" be unique */
                if (!ret)
-                       list_add (&retval->pools, &dev->dma_pools);
+                       list_add(&retval->pools, &dev->dma_pools);
                else {
                        kfree(retval);
                        retval = NULL;
                }
                mutex_unlock(&pools_lock);
        } else
-               INIT_LIST_HEAD (&retval->pools);
+               INIT_LIST_HEAD(&retval->pools);
 
        return retval;
 }
+EXPORT_SYMBOL(dma_pool_create);
 
-
-static struct dma_page *
-pool_alloc_page (struct dma_pool *pool, gfp_t mem_flags)
+static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
 {
-       struct dma_page *page;
-       int             mapsize;
+       unsigned int offset = 0;
+       unsigned int next_boundary = pool->boundary;
+
+       do {
+               unsigned int next = offset + pool->size;
+               if (unlikely((next + pool->size) >= next_boundary)) {
+                       next = next_boundary;
+                       next_boundary += pool->boundary;
+               }
+               *(int *)(page->vaddr + offset) = next;
+               offset = next;
+       } while (offset < pool->allocation);
+}
 
-       mapsize = pool->blocks_per_page;
-       mapsize = (mapsize + BITS_PER_LONG - 1) / BITS_PER_LONG;
-       mapsize *= sizeof (long);
+static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
+{
+       struct dma_page *page;
 
-       page = kmalloc(mapsize + sizeof *page, mem_flags);
+       page = kmalloc(sizeof(*page), mem_flags);
        if (!page)
                return NULL;
-       page->vaddr = dma_alloc_coherent (pool->dev,
-                                           pool->allocation,
-                                           &page->dma,
-                                           mem_flags);
+       page->vaddr = dma_alloc_coherent(pool->dev, pool->allocation,
+                                        &page->dma, mem_flags);
        if (page->vaddr) {
-               memset (page->bitmap, 0xff, mapsize);   // bit set == free
-#ifdef CONFIG_DEBUG_SLAB
-               memset (page->vaddr, POOL_POISON_FREED, pool->allocation);
+#ifdef DMAPOOL_DEBUG
+               memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
 #endif
-               list_add (&page->page_list, &pool->page_list);
+               pool_initialise_page(pool, page);
+               list_add(&page->page_list, &pool->page_list);
                page->in_use = 0;
+               page->offset = 0;
        } else {
-               kfree (page);
+               kfree(page);
                page = NULL;
        }
        return page;
 }
 
-
-static inline int
-is_page_busy (int blocks, unsigned long *bitmap)
+static inline int is_page_busy(struct dma_page *page)
 {
-       while (blocks > 0) {
-               if (*bitmap++ != ~0UL)
-                       return 1;
-               blocks -= BITS_PER_LONG;
-       }
-       return 0;
+       return page->in_use != 0;
 }
 
-static void
-pool_free_page (struct dma_pool *pool, struct dma_page *page)
+static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
 {
-       dma_addr_t      dma = page->dma;
+       dma_addr_t dma = page->dma;
 
-#ifdef CONFIG_DEBUG_SLAB
-       memset (page->vaddr, POOL_POISON_FREED, pool->allocation);
+#ifdef DMAPOOL_DEBUG
+       memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
 #endif
-       dma_free_coherent (pool->dev, pool->allocation, page->vaddr, dma);
-       list_del (&page->page_list);
-       kfree (page);
+       dma_free_coherent(pool->dev, pool->allocation, page->vaddr, dma);
+       list_del(&page->page_list);
+       kfree(page);
 }
 
-
 /**
  * dma_pool_destroy - destroys a pool of dma memory blocks.
  * @pool: dma pool that will be destroyed
@@ -228,36 +261,37 @@ pool_free_page (struct dma_pool *pool, struct dma_page *page)
  * Caller guarantees that no more memory from the pool is in use,
  * and that nothing will try to use the pool after this call.
  */
-void
-dma_pool_destroy (struct dma_pool *pool)
+void dma_pool_destroy(struct dma_pool *pool)
 {
        mutex_lock(&pools_lock);
-       list_del (&pool->pools);
-       if (pool->dev && list_empty (&pool->dev->dma_pools))
-               device_remove_file (pool->dev, &dev_attr_pools);
+       list_del(&pool->pools);
+       if (pool->dev && list_empty(&pool->dev->dma_pools))
+               device_remove_file(pool->dev, &dev_attr_pools);
        mutex_unlock(&pools_lock);
 
-       while (!list_empty (&pool->page_list)) {
-               struct dma_page         *page;
-               page = list_entry (pool->page_list.next,
-                               struct dma_page, page_list);
-               if (is_page_busy (pool->blocks_per_page, page->bitmap)) {
+       while (!list_empty(&pool->page_list)) {
+               struct dma_page *page;
+               page = list_entry(pool->page_list.next,
+                                 struct dma_page, page_list);
+               if (is_page_busy(page)) {
                        if (pool->dev)
-                               dev_err(pool->dev, "dma_pool_destroy %s, %p busy\n",
+                               dev_err(pool->dev,
+                                       "dma_pool_destroy %s, %p busy\n",
                                        pool->name, page->vaddr);
                        else
-                               printk (KERN_ERR "dma_pool_destroy %s, %p busy\n",
-                                       pool->name, page->vaddr);
+                               printk(KERN_ERR
+                                      "dma_pool_destroy %s, %p busy\n",
+                                      pool->name, page->vaddr);
                        /* leak the still-in-use consistent memory */
-                       list_del (&page->page_list);
-                       kfree (page);
+                       list_del(&page->page_list);
+                       kfree(page);
                } else
-                       pool_free_page (pool, page);
+                       pool_free_page(pool, page);
        }
 
-       kfree (pool);
+       kfree(pool);
 }
-
+EXPORT_SYMBOL(dma_pool_destroy);
 
 /**
  * dma_pool_alloc - get a block of consistent memory
@@ -267,88 +301,71 @@ dma_pool_destroy (struct dma_pool *pool)
  *
  * This returns the kernel virtual address of a currently unused block,
  * and reports its dma address through the handle.
- * If such a memory block can't be allocated, null is returned.
+ * If such a memory block can't be allocated, %NULL is returned.
  */
-void *
-dma_pool_alloc (struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle)
+void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
+                    dma_addr_t *handle)
 {
-       unsigned long           flags;
-       struct dma_page         *page;
-       int                     map, block;
-       size_t                  offset;
-       void                    *retval;
-
-restart:
-       spin_lock_irqsave (&pool->lock, flags);
+       unsigned long flags;
+       struct dma_page *page;
+       size_t offset;
+       void *retval;
+
+       might_sleep_if(mem_flags & __GFP_WAIT);
+
+       spin_lock_irqsave(&pool->lock, flags);
+ restart:
        list_for_each_entry(page, &pool->page_list, page_list) {
-               int             i;
-               /* only cachable accesses here ... */
-               for (map = 0, i = 0;
-                               i < pool->blocks_per_page;
-                               i += BITS_PER_LONG, map++) {
-                       if (page->bitmap [map] == 0)
-                               continue;
-                       block = ffz (~ page->bitmap [map]);
-                       if ((i + block) < pool->blocks_per_page) {
-                               clear_bit (block, &page->bitmap [map]);
-                               offset = (BITS_PER_LONG * map) + block;
-                               offset *= pool->size;
-                               goto ready;
-                       }
-               }
+               if (page->offset < pool->allocation)
+                       goto ready;
        }
-       if (!(page = pool_alloc_page (pool, GFP_ATOMIC))) {
+       page = pool_alloc_page(pool, GFP_ATOMIC);
+       if (!page) {
                if (mem_flags & __GFP_WAIT) {
-                       DECLARE_WAITQUEUE (wait, current);
+                       DECLARE_WAITQUEUE(wait, current);
 
-                       __set_current_state(TASK_INTERRUPTIBLE);
-                       add_wait_queue (&pool->waitq, &wait);
-                       spin_unlock_irqrestore (&pool->lock, flags);
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       __add_wait_queue(&pool->waitq, &wait);
+                       spin_unlock_irqrestore(&pool->lock, flags);
 
-                       schedule_timeout (POOL_TIMEOUT_JIFFIES);
+                       schedule_timeout(POOL_TIMEOUT_JIFFIES);
 
-                       remove_wait_queue (&pool->waitq, &wait);
+                       spin_lock_irqsave(&pool->lock, flags);
+                       __remove_wait_queue(&pool->waitq, &wait);
                        goto restart;
                }
                retval = NULL;
                goto done;
        }
 
-       clear_bit (0, &page->bitmap [0]);
-       offset = 0;
-ready:
+ ready:
        page->in_use++;
+       offset = page->offset;
+       page->offset = *(int *)(page->vaddr + offset);
        retval = offset + page->vaddr;
        *handle = offset + page->dma;
-#ifdef CONFIG_DEBUG_SLAB
-       memset (retval, POOL_POISON_ALLOCATED, pool->size);
+#ifdef DMAPOOL_DEBUG
+       memset(retval, POOL_POISON_ALLOCATED, pool->size);
 #endif
-done:
-       spin_unlock_irqrestore (&pool->lock, flags);
+ done:
+       spin_unlock_irqrestore(&pool->lock, flags);
        return retval;
 }
+EXPORT_SYMBOL(dma_pool_alloc);
 
-
-static struct dma_page *
-pool_find_page (struct dma_pool *pool, dma_addr_t dma)
+static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
 {
-       unsigned long           flags;
-       struct dma_page         *page;
+       struct dma_page *page;
 
-       spin_lock_irqsave (&pool->lock, flags);
        list_for_each_entry(page, &pool->page_list, page_list) {
                if (dma < page->dma)
                        continue;
                if (dma < (page->dma + pool->allocation))
-                       goto done;
+                       return page;
        }
-       page = NULL;
-done:
-       spin_unlock_irqrestore (&pool->lock, flags);
-       return page;
+       return NULL;
 }
 
-
 /**
  * dma_pool_free - put block back into dma pool
  * @pool: the dma pool holding the block
@@ -358,62 +375,75 @@ done:
  * Caller promises neither device nor driver will again touch this block
  * unless it is first re-allocated.
  */
-void
-dma_pool_free (struct dma_pool *pool, void *vaddr, dma_addr_t dma)
+void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
 {
-       struct dma_page         *page;
-       unsigned long           flags;
-       int                     map, block;
+       struct dma_page *page;
+       unsigned long flags;
+       unsigned int offset;
 
-       if ((page = pool_find_page(pool, dma)) == NULL) {
+       spin_lock_irqsave(&pool->lock, flags);
+       page = pool_find_page(pool, dma);
+       if (!page) {
+               spin_unlock_irqrestore(&pool->lock, flags);
                if (pool->dev)
-                       dev_err(pool->dev, "dma_pool_free %s, %p/%lx (bad dma)\n",
-                               pool->name, vaddr, (unsigned long) dma);
+                       dev_err(pool->dev,
+                               "dma_pool_free %s, %p/%lx (bad dma)\n",
+                               pool->name, vaddr, (unsigned long)dma);
                else
-                       printk (KERN_ERR "dma_pool_free %s, %p/%lx (bad dma)\n",
-                               pool->name, vaddr, (unsigned long) dma);
+                       printk(KERN_ERR "dma_pool_free %s, %p/%lx (bad dma)\n",
+                              pool->name, vaddr, (unsigned long)dma);
                return;
        }
 
-       block = dma - page->dma;
-       block /= pool->size;
-       map = block / BITS_PER_LONG;
-       block %= BITS_PER_LONG;
-
-#ifdef CONFIG_DEBUG_SLAB
-       if (((dma - page->dma) + (void *)page->vaddr) != vaddr) {
+       offset = vaddr - page->vaddr;
+#ifdef DMAPOOL_DEBUG
+       if ((dma - page->dma) != offset) {
+               spin_unlock_irqrestore(&pool->lock, flags);
                if (pool->dev)
-                       dev_err(pool->dev, "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
-                               pool->name, vaddr, (unsigned long long) dma);
+                       dev_err(pool->dev,
+                               "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
+                               pool->name, vaddr, (unsigned long long)dma);
                else
-                       printk (KERN_ERR "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
-                               pool->name, vaddr, (unsigned long long) dma);
+                       printk(KERN_ERR
+                              "dma_pool_free %s, %p (bad vaddr)/%Lx\n",
+                              pool->name, vaddr, (unsigned long long)dma);
                return;
        }
-       if (page->bitmap [map] & (1UL << block)) {
-               if (pool->dev)
-                       dev_err(pool->dev, "dma_pool_free %s, dma %Lx already free\n",
-                               pool->name, (unsigned long long)dma);
-               else
-                       printk (KERN_ERR "dma_pool_free %s, dma %Lx already free\n",
-                               pool->name, (unsigned long long)dma);
-               return;
+       {
+               unsigned int chain = page->offset;
+               while (chain < pool->allocation) {
+                       if (chain != offset) {
+                               chain = *(int *)(page->vaddr + chain);
+                               continue;
+                       }
+                       spin_unlock_irqrestore(&pool->lock, flags);
+                       if (pool->dev)
+                               dev_err(pool->dev, "dma_pool_free %s, dma %Lx "
+                                       "already free\n", pool->name,
+                                       (unsigned long long)dma);
+                       else
+                               printk(KERN_ERR "dma_pool_free %s, dma %Lx "
+                                       "already free\n", pool->name,
+                                       (unsigned long long)dma);
+                       return;
+               }
        }
-       memset (vaddr, POOL_POISON_FREED, pool->size);
+       memset(vaddr, POOL_POISON_FREED, pool->size);
 #endif
 
-       spin_lock_irqsave (&pool->lock, flags);
        page->in_use--;
-       set_bit (block, &page->bitmap [map]);
-       if (waitqueue_active (&pool->waitq))
-               wake_up (&pool->waitq);
+       *(int *)vaddr = page->offset;
+       page->offset = offset;
+       if (waitqueue_active(&pool->waitq))
+               wake_up_locked(&pool->waitq);
        /*
         * Resist a temptation to do
-        *    if (!is_page_busy(bpp, page->bitmap)) pool_free_page(pool, page);
+        *    if (!is_page_busy(page)) pool_free_page(pool, page);
         * Better have a few empty pages hang around.
         */
-       spin_unlock_irqrestore (&pool->lock, flags);
+       spin_unlock_irqrestore(&pool->lock, flags);
 }
+EXPORT_SYMBOL(dma_pool_free);
 
 /*
  * Managed DMA pool
@@ -458,6 +488,7 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
 
        return pool;
 }
+EXPORT_SYMBOL(dmam_pool_create);
 
 /**
  * dmam_pool_destroy - Managed dma_pool_destroy()
@@ -472,10 +503,4 @@ void dmam_pool_destroy(struct dma_pool *pool)
        dma_pool_destroy(pool);
        WARN_ON(devres_destroy(dev, dmam_pool_release, dmam_pool_match, pool));
 }
-
-EXPORT_SYMBOL (dma_pool_create);
-EXPORT_SYMBOL (dma_pool_destroy);
-EXPORT_SYMBOL (dma_pool_alloc);
-EXPORT_SYMBOL (dma_pool_free);
-EXPORT_SYMBOL (dmam_pool_create);
-EXPORT_SYMBOL (dmam_pool_destroy);
+EXPORT_SYMBOL(dmam_pool_destroy);