i2c: tegra: Add delay before resetting the controller after NACK
[linux-2.6.git] / drivers / dma / shdma.c
index b3b065c..39485c3 100644 (file)
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
-#include <linux/dmapool.h>
 #include <linux/platform_device.h>
-#include <cpu/dma.h>
-#include <asm/dma-sh.h>
+#include <linux/pm_runtime.h>
+#include <linux/sh_dma.h>
+#include <linux/notifier.h>
+#include <linux/kdebug.h>
+#include <linux/spinlock.h>
+#include <linux/rculist.h>
+
+#include "dmaengine.h"
 #include "shdma.h"
 
 /* DMA descriptor control */
-#define DESC_LAST      (-1)
-#define DESC_COMP      (1)
-#define DESC_NCOMP     (0)
+enum sh_dmae_desc_status {
+       DESC_IDLE,
+       DESC_PREPARED,
+       DESC_SUBMITTED,
+       DESC_COMPLETED, /* completed, have to call callback */
+       DESC_WAITING,   /* callback called, waiting for ack / re-submit */
+};
 
 #define NR_DESCS_PER_CHANNEL 32
+/* Default MEMCPY transfer size = 2^2 = 4 bytes */
+#define LOG2_DEFAULT_XFER_SIZE 2
+
 /*
- * Define the default configuration for dual address memory-memory transfer.
- * The 0x400 value represents auto-request, external->external.
- *
- * And this driver set 4byte burst mode.
- * If you want to change mode, you need to change RS_DEFAULT of value.
- * (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
+ * Used for write-side mutual exclusion for the global device list,
+ * read-side synchronization by way of RCU, and per-controller data.
  */
-#define RS_DEFAULT  (RS_DUAL)
+static DEFINE_SPINLOCK(sh_dmae_lock);
+static LIST_HEAD(sh_dmae_devices);
+
+/* A bitmask with bits enough for enum sh_dmae_slave_chan_id */
+static unsigned long sh_dmae_slave_used[BITS_TO_LONGS(SH_DMA_SLAVE_NUMBER)];
+
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all);
 
-#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
 static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
 {
-       ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+       __raw_writel(data, sh_dc->base + reg / sizeof(u32));
 }
 
 static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
 {
-       return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+       return __raw_readl(sh_dc->base + reg / sizeof(u32));
 }
 
-static void dmae_init(struct sh_dmae_chan *sh_chan)
+static u16 dmaor_read(struct sh_dmae_device *shdev)
+{
+       u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
+
+       if (shdev->pdata->dmaor_is_32bit)
+               return __raw_readl(addr);
+       else
+               return __raw_readw(addr);
+}
+
+static void dmaor_write(struct sh_dmae_device *shdev, u16 data)
+{
+       u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
+
+       if (shdev->pdata->dmaor_is_32bit)
+               __raw_writel(data, addr);
+       else
+               __raw_writew(data, addr);
+}
+
+static void chcr_write(struct sh_dmae_chan *sh_dc, u32 data)
 {
-       u32 chcr = RS_DEFAULT; /* default is DUAL mode */
-       sh_dmae_writel(sh_chan, chcr, CHCR);
+       struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+       __raw_writel(data, sh_dc->base + shdev->chcr_offset / sizeof(u32));
+}
+
+static u32 chcr_read(struct sh_dmae_chan *sh_dc)
+{
+       struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+       return __raw_readl(sh_dc->base + shdev->chcr_offset / sizeof(u32));
 }
 
 /*
@@ -67,180 +109,266 @@ static void dmae_init(struct sh_dmae_chan *sh_chan)
  *
  * SH7780 has two DMAOR register
  */
-static void sh_dmae_ctl_stop(int id)
+static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev)
 {
-       unsigned short dmaor = dmaor_read_reg(id);
+       unsigned short dmaor;
+       unsigned long flags;
+
+       spin_lock_irqsave(&sh_dmae_lock, flags);
+
+       dmaor = dmaor_read(shdev);
+       dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME));
 
-       dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
-       dmaor_write_reg(id, dmaor);
+       spin_unlock_irqrestore(&sh_dmae_lock, flags);
 }
 
-static int sh_dmae_rst(int id)
+static int sh_dmae_rst(struct sh_dmae_device *shdev)
 {
        unsigned short dmaor;
+       unsigned long flags;
 
-       sh_dmae_ctl_stop(id);
-       dmaor = (dmaor_read_reg(id)|DMAOR_INIT);
+       spin_lock_irqsave(&sh_dmae_lock, flags);
 
-       dmaor_write_reg(id, dmaor);
-       if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) {
-               pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
-               return -EINVAL;
+       dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
+
+       dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
+
+       dmaor = dmaor_read(shdev);
+
+       spin_unlock_irqrestore(&sh_dmae_lock, flags);
+
+       if (dmaor & (DMAOR_AE | DMAOR_NMIF)) {
+               dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n");
+               return -EIO;
        }
        return 0;
 }
 
-static int dmae_is_idle(struct sh_dmae_chan *sh_chan)
+static bool dmae_is_busy(struct sh_dmae_chan *sh_chan)
 {
-       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
-       if (chcr & CHCR_DE) {
-               if (!(chcr & CHCR_TE))
-                       return -EBUSY; /* working */
-       }
-       return 0; /* waiting */
+       u32 chcr = chcr_read(sh_chan);
+
+       if ((chcr & (CHCR_DE | CHCR_TE)) == CHCR_DE)
+               return true; /* working */
+
+       return false; /* waiting */
 }
 
-static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
+static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr)
 {
-       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
-       return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
+       struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+       struct sh_dmae_pdata *pdata = shdev->pdata;
+       int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) |
+               ((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift);
+
+       if (cnt >= pdata->ts_shift_num)
+               cnt = 0;
+
+       return pdata->ts_shift[cnt];
 }
 
-static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw)
+static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size)
 {
-       sh_dmae_writel(sh_chan, hw.sar, SAR);
-       sh_dmae_writel(sh_chan, hw.dar, DAR);
-       sh_dmae_writel(sh_chan,
-               (hw.tcr >> calc_xmit_shift(sh_chan)), TCR);
+       struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+       struct sh_dmae_pdata *pdata = shdev->pdata;
+       int i;
+
+       for (i = 0; i < pdata->ts_shift_num; i++)
+               if (pdata->ts_shift[i] == l2size)
+                       break;
+
+       if (i == pdata->ts_shift_num)
+               i = 0;
+
+       return ((i << pdata->ts_low_shift) & pdata->ts_low_mask) |
+               ((i << pdata->ts_high_shift) & pdata->ts_high_mask);
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw)
+{
+       sh_dmae_writel(sh_chan, hw->sar, SAR);
+       sh_dmae_writel(sh_chan, hw->dar, DAR);
+       sh_dmae_writel(sh_chan, hw->tcr >> sh_chan->xmit_shift, TCR);
 }
 
 static void dmae_start(struct sh_dmae_chan *sh_chan)
 {
-       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+       struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+       u32 chcr = chcr_read(sh_chan);
+
+       if (shdev->pdata->needs_tend_set)
+               sh_dmae_writel(sh_chan, 0xFFFFFFFF, TEND);
 
-       chcr |= (CHCR_DE|CHCR_IE);
-       sh_dmae_writel(sh_chan, chcr, CHCR);
+       chcr |= CHCR_DE | shdev->chcr_ie_bit;
+       chcr_write(sh_chan, chcr & ~CHCR_TE);
 }
 
 static void dmae_halt(struct sh_dmae_chan *sh_chan)
 {
-       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+       struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+       u32 chcr = chcr_read(sh_chan);
+
+       chcr &= ~(CHCR_DE | CHCR_TE | shdev->chcr_ie_bit);
+       chcr_write(sh_chan, chcr);
+}
 
-       chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
-       sh_dmae_writel(sh_chan, chcr, CHCR);
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+       /*
+        * Default configuration for dual address memory-memory transfer.
+        * 0x400 represents auto-request.
+        */
+       u32 chcr = DM_INC | SM_INC | 0x400 | log2size_to_chcr(sh_chan,
+                                                  LOG2_DEFAULT_XFER_SIZE);
+       sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr);
+       chcr_write(sh_chan, chcr);
 }
 
 static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
 {
-       int ret = dmae_is_idle(sh_chan);
-       /* When DMA was working, can not set data to CHCR */
-       if (ret)
-               return ret;
+       /* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */
+       if (dmae_is_busy(sh_chan))
+               return -EBUSY;
+
+       sh_chan->xmit_shift = calc_xmit_shift(sh_chan, val);
+       chcr_write(sh_chan, val);
 
-       sh_dmae_writel(sh_chan, val, CHCR);
        return 0;
 }
 
-#define DMARS1_ADDR    0x04
-#define DMARS2_ADDR    0x08
-#define DMARS_SHIFT 8
-#define DMARS_CHAN_MSK 0x01
 static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
 {
-       u32 addr;
-       int shift = 0;
-       int ret = dmae_is_idle(sh_chan);
-       if (ret)
-               return ret;
-
-       if (sh_chan->id & DMARS_CHAN_MSK)
-               shift = DMARS_SHIFT;
-
-       switch (sh_chan->id) {
-       /* DMARS0 */
-       case 0:
-       case 1:
-               addr = SH_DMARS_BASE;
-               break;
-       /* DMARS1 */
-       case 2:
-       case 3:
-               addr = (SH_DMARS_BASE + DMARS1_ADDR);
-               break;
-       /* DMARS2 */
-       case 4:
-       case 5:
-               addr = (SH_DMARS_BASE + DMARS2_ADDR);
-               break;
-       default:
-               return -EINVAL;
-       }
+       struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+       struct sh_dmae_pdata *pdata = shdev->pdata;
+       const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->id];
+       u16 __iomem *addr = shdev->dmars;
+       unsigned int shift = chan_pdata->dmars_bit;
+
+       if (dmae_is_busy(sh_chan))
+               return -EBUSY;
 
-       ctrl_outw((val << shift) |
-               (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
-               addr);
+       if (pdata->no_dmars)
+               return 0;
+
+       /* in the case of a missing DMARS resource use first memory window */
+       if (!addr)
+               addr = (u16 __iomem *)shdev->chan_reg;
+       addr += chan_pdata->dmars / sizeof(u16);
+
+       __raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift),
+                    addr);
 
        return 0;
 }
 
 static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 {
-       struct sh_desc *desc = tx_to_sh_desc(tx);
+       struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c;
        struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
+       dma_async_tx_callback callback = tx->callback;
        dma_cookie_t cookie;
 
        spin_lock_bh(&sh_chan->desc_lock);
 
-       cookie = sh_chan->common.cookie;
-       cookie++;
-       if (cookie < 0)
-               cookie = 1;
+       cookie = dma_cookie_assign(tx);
+
+       /* Mark all chunks of this descriptor as submitted, move to the queue */
+       list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
+               /*
+                * All chunks are on the global ld_free, so, we have to find
+                * the end of the chain ourselves
+                */
+               if (chunk != desc && (chunk->mark == DESC_IDLE ||
+                                     chunk->async_tx.cookie > 0 ||
+                                     chunk->async_tx.cookie == -EBUSY ||
+                                     &chunk->node == &sh_chan->ld_free))
+                       break;
+               chunk->mark = DESC_SUBMITTED;
+               /* Callback goes to the last chunk */
+               chunk->async_tx.callback = NULL;
+               chunk->cookie = cookie;
+               list_move_tail(&chunk->node, &sh_chan->ld_queue);
+               last = chunk;
+       }
 
-       /* If desc only in the case of 1 */
-       if (desc->async_tx.cookie != -EBUSY)
-               desc->async_tx.cookie = cookie;
-       sh_chan->common.cookie = desc->async_tx.cookie;
+       last->async_tx.callback = callback;
+       last->async_tx.callback_param = tx->callback_param;
 
-       list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev);
+       dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n",
+               tx->cookie, &last->async_tx, sh_chan->id,
+               desc->hw.sar, desc->hw.tcr, desc->hw.dar);
 
        spin_unlock_bh(&sh_chan->desc_lock);
 
        return cookie;
 }
 
+/* Called with desc_lock held */
 static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
 {
-       struct sh_desc *desc, *_desc, *ret = NULL;
+       struct sh_desc *desc;
 
-       spin_lock_bh(&sh_chan->desc_lock);
-       list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) {
-               if (async_tx_test_ack(&desc->async_tx)) {
+       list_for_each_entry(desc, &sh_chan->ld_free, node)
+               if (desc->mark != DESC_PREPARED) {
+                       BUG_ON(desc->mark != DESC_IDLE);
                        list_del(&desc->node);
-                       ret = desc;
-                       break;
+                       return desc;
                }
-       }
-       spin_unlock_bh(&sh_chan->desc_lock);
 
-       return ret;
+       return NULL;
 }
 
-static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc)
+static const struct sh_dmae_slave_config *sh_dmae_find_slave(
+       struct sh_dmae_chan *sh_chan, struct sh_dmae_slave *param)
 {
-       if (desc) {
-               spin_lock_bh(&sh_chan->desc_lock);
+       struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+       struct sh_dmae_pdata *pdata = shdev->pdata;
+       int i;
 
-               list_splice_init(&desc->tx_list, &sh_chan->ld_free);
-               list_add(&desc->node, &sh_chan->ld_free);
+       if (param->slave_id >= SH_DMA_SLAVE_NUMBER)
+               return NULL;
 
-               spin_unlock_bh(&sh_chan->desc_lock);
-       }
+       for (i = 0; i < pdata->slave_num; i++)
+               if (pdata->slave[i].slave_id == param->slave_id)
+                       return pdata->slave + i;
+
+       return NULL;
 }
 
 static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
 {
        struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
        struct sh_desc *desc;
+       struct sh_dmae_slave *param = chan->private;
+       int ret;
+
+       pm_runtime_get_sync(sh_chan->dev);
+
+       /*
+        * This relies on the guarantee from dmaengine that alloc_chan_resources
+        * never runs concurrently with itself or free_chan_resources.
+        */
+       if (param) {
+               const struct sh_dmae_slave_config *cfg;
+
+               cfg = sh_dmae_find_slave(sh_chan, param);
+               if (!cfg) {
+                       ret = -EINVAL;
+                       goto efindslave;
+               }
+
+               if (test_and_set_bit(param->slave_id, sh_dmae_slave_used)) {
+                       ret = -EBUSY;
+                       goto etestused;
+               }
+
+               param->config = cfg;
+
+               dmae_set_dmars(sh_chan, cfg->mid_rid);
+               dmae_set_chcr(sh_chan, cfg->chcr);
+       } else {
+               dmae_init(sh_chan);
+       }
 
        spin_lock_bh(&sh_chan->desc_lock);
        while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
@@ -253,16 +381,28 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
                dma_async_tx_descriptor_init(&desc->async_tx,
                                        &sh_chan->common);
                desc->async_tx.tx_submit = sh_dmae_tx_submit;
-               desc->async_tx.flags = DMA_CTRL_ACK;
-               INIT_LIST_HEAD(&desc->tx_list);
-               sh_dmae_put_desc(sh_chan, desc);
+               desc->mark = DESC_IDLE;
 
                spin_lock_bh(&sh_chan->desc_lock);
+               list_add(&desc->node, &sh_chan->ld_free);
                sh_chan->descs_allocated++;
        }
        spin_unlock_bh(&sh_chan->desc_lock);
 
+       if (!sh_chan->descs_allocated) {
+               ret = -ENOMEM;
+               goto edescalloc;
+       }
+
        return sh_chan->descs_allocated;
+
+edescalloc:
+       if (param)
+               clear_bit(param->slave_id, sh_dmae_slave_used);
+etestused:
+efindslave:
+       pm_runtime_put(sh_chan->dev);
+       return ret;
 }
 
 /*
@@ -273,8 +413,26 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan)
        struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
        struct sh_desc *desc, *_desc;
        LIST_HEAD(list);
+       int descs = sh_chan->descs_allocated;
+
+       /* Protect against ISR */
+       spin_lock_irq(&sh_chan->desc_lock);
+       dmae_halt(sh_chan);
+       spin_unlock_irq(&sh_chan->desc_lock);
+
+       /* Now no new interrupts will occur */
+
+       /* Prepared and not submitted descriptors can still be on the queue */
+       if (!list_empty(&sh_chan->ld_queue))
+               sh_dmae_chan_ld_cleanup(sh_chan, true);
+
+       if (chan->private) {
+               /* The caller is holding dma_list_mutex */
+               struct sh_dmae_slave *param = chan->private;
+               clear_bit(param->slave_id, sh_dmae_slave_used);
+               chan->private = NULL;
+       }
 
-       BUG_ON(!list_empty(&sh_chan->ld_queue));
        spin_lock_bh(&sh_chan->desc_lock);
 
        list_splice_init(&sh_chan->ld_free, &list);
@@ -282,128 +440,378 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan)
 
        spin_unlock_bh(&sh_chan->desc_lock);
 
+       if (descs > 0)
+               pm_runtime_put(sh_chan->dev);
+
        list_for_each_entry_safe(desc, _desc, &list, node)
                kfree(desc);
 }
 
+/**
+ * sh_dmae_add_desc - get, set up and return one transfer descriptor
+ * @sh_chan:   DMA channel
+ * @flags:     DMA transfer flags
+ * @dest:      destination DMA address, incremented when direction equals
+ *             DMA_FROM_DEVICE or DMA_BIDIRECTIONAL
+ * @src:       source DMA address, incremented when direction equals
+ *             DMA_TO_DEVICE or DMA_BIDIRECTIONAL
+ * @len:       DMA transfer length
+ * @first:     if NULL, set to the current descriptor and cookie set to -EBUSY
+ * @direction: needed for slave DMA to decide which address to keep constant,
+ *             equals DMA_BIDIRECTIONAL for MEMCPY
+ * Returns 0 or an error
+ * Locks: called with desc_lock held
+ */
+static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
+       unsigned long flags, dma_addr_t *dest, dma_addr_t *src, size_t *len,
+       struct sh_desc **first, enum dma_data_direction direction)
+{
+       struct sh_desc *new;
+       size_t copy_size;
+
+       if (!*len)
+               return NULL;
+
+       /* Allocate the link descriptor from the free list */
+       new = sh_dmae_get_desc(sh_chan);
+       if (!new) {
+               dev_err(sh_chan->dev, "No free link descriptor available\n");
+               return NULL;
+       }
+
+       copy_size = min(*len, (size_t)SH_DMA_TCR_MAX + 1);
+
+       new->hw.sar = *src;
+       new->hw.dar = *dest;
+       new->hw.tcr = copy_size;
+
+       if (!*first) {
+               /* First desc */
+               new->async_tx.cookie = -EBUSY;
+               *first = new;
+       } else {
+               /* Other desc - invisible to the user */
+               new->async_tx.cookie = -EINVAL;
+       }
+
+       dev_dbg(sh_chan->dev,
+               "chaining (%u/%u)@%x -> %x with %p, cookie %d, shift %d\n",
+               copy_size, *len, *src, *dest, &new->async_tx,
+               new->async_tx.cookie, sh_chan->xmit_shift);
+
+       new->mark = DESC_PREPARED;
+       new->async_tx.flags = flags;
+       new->direction = direction;
+
+       *len -= copy_size;
+       if (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE)
+               *src += copy_size;
+       if (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE)
+               *dest += copy_size;
+
+       return new;
+}
+
+/*
+ * sh_dmae_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_BIDIRECTIONAL
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_chan,
+       struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
+       enum dma_data_direction direction, unsigned long flags)
+{
+       struct scatterlist *sg;
+       struct sh_desc *first = NULL, *new = NULL /* compiler... */;
+       LIST_HEAD(tx_list);
+       int chunks = 0;
+       int i;
+
+       if (!sg_len)
+               return NULL;
+
+       for_each_sg(sgl, sg, sg_len, i)
+               chunks += (sg_dma_len(sg) + SH_DMA_TCR_MAX) /
+                       (SH_DMA_TCR_MAX + 1);
+
+       /* Have to lock the whole loop to protect against concurrent release */
+       spin_lock_bh(&sh_chan->desc_lock);
+
+       /*
+        * Chaining:
+        * first descriptor is what user is dealing with in all API calls, its
+        *      cookie is at first set to -EBUSY, at tx-submit to a positive
+        *      number
+        * if more than one chunk is needed further chunks have cookie = -EINVAL
+        * the last chunk, if not equal to the first, has cookie = -ENOSPC
+        * all chunks are linked onto the tx_list head with their .node heads
+        *      only during this function, then they are immediately spliced
+        *      back onto the free list in form of a chain
+        */
+       for_each_sg(sgl, sg, sg_len, i) {
+               dma_addr_t sg_addr = sg_dma_address(sg);
+               size_t len = sg_dma_len(sg);
+
+               if (!len)
+                       goto err_get_desc;
+
+               do {
+                       dev_dbg(sh_chan->dev, "Add SG #%d@%p[%d], dma %llx\n",
+                               i, sg, len, (unsigned long long)sg_addr);
+
+                       if (direction == DMA_FROM_DEVICE)
+                               new = sh_dmae_add_desc(sh_chan, flags,
+                                               &sg_addr, addr, &len, &first,
+                                               direction);
+                       else
+                               new = sh_dmae_add_desc(sh_chan, flags,
+                                               addr, &sg_addr, &len, &first,
+                                               direction);
+                       if (!new)
+                               goto err_get_desc;
+
+                       new->chunks = chunks--;
+                       list_add_tail(&new->node, &tx_list);
+               } while (len);
+       }
+
+       if (new != first)
+               new->async_tx.cookie = -ENOSPC;
+
+       /* Put them back on the free list, so, they don't get lost */
+       list_splice_tail(&tx_list, &sh_chan->ld_free);
+
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       return &first->async_tx;
+
+err_get_desc:
+       list_for_each_entry(new, &tx_list, node)
+               new->mark = DESC_IDLE;
+       list_splice(&tx_list, &sh_chan->ld_free);
+
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       return NULL;
+}
+
 static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
        struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
        size_t len, unsigned long flags)
 {
        struct sh_dmae_chan *sh_chan;
-       struct sh_desc *first = NULL, *prev = NULL, *new;
-       size_t copy_size;
+       struct scatterlist sg;
 
-       if (!chan)
+       if (!chan || !len)
                return NULL;
 
-       if (!len)
+       sh_chan = to_sh_chan(chan);
+
+       sg_init_table(&sg, 1);
+       sg_set_page(&sg, pfn_to_page(PFN_DOWN(dma_src)), len,
+                   offset_in_page(dma_src));
+       sg_dma_address(&sg) = dma_src;
+       sg_dma_len(&sg) = len;
+
+       return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_BIDIRECTIONAL,
+                              flags);
+}
+
+static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
+       struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+       enum dma_transfer_direction direction, unsigned long flags,
+       void *context)
+{
+       struct sh_dmae_slave *param;
+       struct sh_dmae_chan *sh_chan;
+       dma_addr_t slave_addr;
+
+       if (!chan)
                return NULL;
 
        sh_chan = to_sh_chan(chan);
+       param = chan->private;
 
-       do {
-               /* Allocate the link descriptor from DMA pool */
-               new = sh_dmae_get_desc(sh_chan);
-               if (!new) {
-                       dev_err(sh_chan->dev,
-                                       "No free memory for link descriptor\n");
-                       goto err_get_desc;
-               }
+       /* Someone calling slave DMA on a public channel? */
+       if (!param || !sg_len) {
+               dev_warn(sh_chan->dev, "%s: bad parameter: %p, %d, %d\n",
+                        __func__, param, sg_len, param ? param->slave_id : -1);
+               return NULL;
+       }
+
+       slave_addr = param->config->addr;
+
+       /*
+        * if (param != NULL), this is a successfully requested slave channel,
+        * therefore param->config != NULL too.
+        */
+       return sh_dmae_prep_sg(sh_chan, sgl, sg_len, &slave_addr,
+                              direction, flags);
+}
 
-               copy_size = min(len, (size_t)SH_DMA_TCR_MAX);
+static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+                          unsigned long arg)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
 
-               new->hw.sar = dma_src;
-               new->hw.dar = dma_dest;
-               new->hw.tcr = copy_size;
-               if (!first)
-                       first = new;
+       /* Only supports DMA_TERMINATE_ALL */
+       if (cmd != DMA_TERMINATE_ALL)
+               return -ENXIO;
 
-               new->mark = DESC_NCOMP;
-               async_tx_ack(&new->async_tx);
+       if (!chan)
+               return -EINVAL;
 
-               prev = new;
-               len -= copy_size;
-               dma_src += copy_size;
-               dma_dest += copy_size;
-               /* Insert the link descriptor to the LD ring */
-               list_add_tail(&new->node, &first->tx_list);
-       } while (len);
+       spin_lock_bh(&sh_chan->desc_lock);
+       dmae_halt(sh_chan);
 
-       new->async_tx.flags = flags; /* client is in control of this ack */
-       new->async_tx.cookie = -EBUSY; /* Last desc */
+       if (!list_empty(&sh_chan->ld_queue)) {
+               /* Record partial transfer */
+               struct sh_desc *desc = list_entry(sh_chan->ld_queue.next,
+                                                 struct sh_desc, node);
+               desc->partial = (desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) <<
+                       sh_chan->xmit_shift;
 
-       return &first->async_tx;
+       }
+       spin_unlock_bh(&sh_chan->desc_lock);
 
-err_get_desc:
-       sh_dmae_put_desc(sh_chan, first);
-       return NULL;
+       sh_dmae_chan_ld_cleanup(sh_chan, true);
 
+       return 0;
 }
 
-/*
- * sh_chan_ld_cleanup - Clean up link descriptors
- *
- * This function clean up the ld_queue of DMA channel.
- */
-static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan)
+static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
 {
        struct sh_desc *desc, *_desc;
+       /* Is the "exposed" head of a chain acked? */
+       bool head_acked = false;
+       dma_cookie_t cookie = 0;
+       dma_async_tx_callback callback = NULL;
+       void *param = NULL;
 
        spin_lock_bh(&sh_chan->desc_lock);
        list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
-               dma_async_tx_callback callback;
-               void *callback_param;
-
-               /* non send data */
-               if (desc->mark == DESC_NCOMP)
+               struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+               BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie);
+               BUG_ON(desc->mark != DESC_SUBMITTED &&
+                      desc->mark != DESC_COMPLETED &&
+                      desc->mark != DESC_WAITING);
+
+               /*
+                * queue is ordered, and we use this loop to (1) clean up all
+                * completed descriptors, and to (2) update descriptor flags of
+                * any chunks in a (partially) completed chain
+                */
+               if (!all && desc->mark == DESC_SUBMITTED &&
+                   desc->cookie != cookie)
                        break;
 
-               /* send data sesc */
-               callback = desc->async_tx.callback;
-               callback_param = desc->async_tx.callback_param;
+               if (tx->cookie > 0)
+                       cookie = tx->cookie;
 
-               /* Remove from ld_queue list */
-               list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+               if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
+                       if (sh_chan->common.completed_cookie != desc->cookie - 1)
+                               dev_dbg(sh_chan->dev,
+                                       "Completing cookie %d, expected %d\n",
+                                       desc->cookie,
+                                       sh_chan->common.completed_cookie + 1);
+                       sh_chan->common.completed_cookie = desc->cookie;
+               }
 
-               dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n",
-                               desc);
+               /* Call callback on the last chunk */
+               if (desc->mark == DESC_COMPLETED && tx->callback) {
+                       desc->mark = DESC_WAITING;
+                       callback = tx->callback;
+                       param = tx->callback_param;
+                       dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n",
+                               tx->cookie, tx, sh_chan->id);
+                       BUG_ON(desc->chunks != 1);
+                       break;
+               }
 
-               list_move(&desc->node, &sh_chan->ld_free);
-               /* Run the link descriptor callback function */
-               if (callback) {
-                       spin_unlock_bh(&sh_chan->desc_lock);
-                       dev_dbg(sh_chan->dev, "link descriptor %p callback\n",
-                                       desc);
-                       callback(callback_param);
-                       spin_lock_bh(&sh_chan->desc_lock);
+               if (tx->cookie > 0 || tx->cookie == -EBUSY) {
+                       if (desc->mark == DESC_COMPLETED) {
+                               BUG_ON(tx->cookie < 0);
+                               desc->mark = DESC_WAITING;
+                       }
+                       head_acked = async_tx_test_ack(tx);
+               } else {
+                       switch (desc->mark) {
+                       case DESC_COMPLETED:
+                               desc->mark = DESC_WAITING;
+                               /* Fall through */
+                       case DESC_WAITING:
+                               if (head_acked)
+                                       async_tx_ack(&desc->async_tx);
+                       }
+               }
+
+               dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n",
+                       tx, tx->cookie);
+
+               if (((desc->mark == DESC_COMPLETED ||
+                     desc->mark == DESC_WAITING) &&
+                    async_tx_test_ack(&desc->async_tx)) || all) {
+                       /* Remove from ld_queue list */
+                       desc->mark = DESC_IDLE;
+                       list_move(&desc->node, &sh_chan->ld_free);
                }
        }
+
+       if (all && !callback)
+               /*
+                * Terminating and the loop completed normally: forgive
+                * uncompleted cookies
+                */
+               sh_chan->common.completed_cookie = sh_chan->common.cookie;
+
        spin_unlock_bh(&sh_chan->desc_lock);
+
+       if (callback)
+               callback(param);
+
+       return callback;
+}
+
+/*
+ * sh_chan_ld_cleanup - Clean up link descriptors
+ *
+ * This function cleans up the ld_queue of DMA channel.
+ */
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
+{
+       while (__ld_cleanup(sh_chan, all))
+               ;
 }
 
 static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
 {
-       struct list_head *ld_node;
-       struct sh_dmae_regs hw;
+       struct sh_desc *desc;
 
+       spin_lock_bh(&sh_chan->desc_lock);
        /* DMA work check */
-       if (dmae_is_idle(sh_chan))
-               return;
-
-       /* Find the first un-transfer desciptor */
-       for (ld_node = sh_chan->ld_queue.next;
-               (ld_node != &sh_chan->ld_queue)
-                       && (to_sh_desc(ld_node)->mark == DESC_COMP);
-               ld_node = ld_node->next)
-               cpu_relax();
-
-       if (ld_node != &sh_chan->ld_queue) {
-               /* Get the ld start address from ld_queue */
-               hw = to_sh_desc(ld_node)->hw;
-               dmae_set_reg(sh_chan, hw);
-               dmae_start(sh_chan);
-       }
+       if (dmae_is_busy(sh_chan))
+               goto sh_chan_xfer_ld_queue_end;
+
+       /* Find the first not transferred descriptor */
+       list_for_each_entry(desc, &sh_chan->ld_queue, node)
+               if (desc->mark == DESC_SUBMITTED) {
+                       dev_dbg(sh_chan->dev, "Queue #%d to %d: %u@%x -> %x\n",
+                               desc->async_tx.cookie, sh_chan->id,
+                               desc->hw.tcr, desc->hw.sar, desc->hw.dar);
+                       /* Get the ld start address from ld_queue */
+                       dmae_set_reg(sh_chan, &desc->hw);
+                       dmae_start(sh_chan);
+                       break;
+               }
+
+sh_chan_xfer_ld_queue_end:
+       spin_unlock_bh(&sh_chan->desc_lock);
 }
 
 static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
@@ -412,36 +820,47 @@ static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
        sh_chan_xfer_ld_queue(sh_chan);
 }
 
-static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
+static enum dma_status sh_dmae_tx_status(struct dma_chan *chan,
                                        dma_cookie_t cookie,
-                                       dma_cookie_t *done,
-                                       dma_cookie_t *used)
+                                       struct dma_tx_state *txstate)
 {
        struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
-       dma_cookie_t last_used;
-       dma_cookie_t last_complete;
+       enum dma_status status;
 
-       sh_dmae_chan_ld_cleanup(sh_chan);
+       sh_dmae_chan_ld_cleanup(sh_chan, false);
 
-       last_used = chan->cookie;
-       last_complete = sh_chan->completed_cookie;
-       if (last_complete == -EBUSY)
-               last_complete = last_used;
+       spin_lock_bh(&sh_chan->desc_lock);
 
-       if (done)
-               *done = last_complete;
+       status = dma_cookie_status(chan, cookie, txstate);
+
+       /*
+        * If we don't find cookie on the queue, it has been aborted and we have
+        * to report error
+        */
+       if (status != DMA_SUCCESS) {
+               struct sh_desc *desc;
+               status = DMA_ERROR;
+               list_for_each_entry(desc, &sh_chan->ld_queue, node)
+                       if (desc->cookie == cookie) {
+                               status = DMA_IN_PROGRESS;
+                               break;
+                       }
+       }
 
-       if (used)
-               *used = last_used;
+       spin_unlock_bh(&sh_chan->desc_lock);
 
-       return dma_async_is_complete(cookie, last_complete, last_used);
+       return status;
 }
 
 static irqreturn_t sh_dmae_interrupt(int irq, void *data)
 {
        irqreturn_t ret = IRQ_NONE;
-       struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
-       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+       struct sh_dmae_chan *sh_chan = data;
+       u32 chcr;
+
+       spin_lock(&sh_chan->desc_lock);
+
+       chcr = chcr_read(sh_chan);
 
        if (chcr & CHCR_TE) {
                /* DMA stop */
@@ -451,154 +870,199 @@ static irqreturn_t sh_dmae_interrupt(int irq, void *data)
                tasklet_schedule(&sh_chan->tasklet);
        }
 
+       spin_unlock(&sh_chan->desc_lock);
+
        return ret;
 }
 
-#if defined(CONFIG_CPU_SH4)
-static irqreturn_t sh_dmae_err(int irq, void *data)
+/* Called from error IRQ or NMI */
+static bool sh_dmae_reset(struct sh_dmae_device *shdev)
 {
-       int err = 0;
-       struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
+       unsigned int handled = 0;
+       int i;
 
-       /* IRQ Multi */
-       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
-               int cnt = 0;
-               switch (irq) {
-#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
-               case DMTE6_IRQ:
-                       cnt++;
-#endif
-               case DMTE0_IRQ:
-                       if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
-                               disable_irq(irq);
-                               return IRQ_HANDLED;
-                       }
-               default:
-                       return IRQ_NONE;
-               }
-       } else {
-               /* reset dma controller */
-               err = sh_dmae_rst(0);
-               if (err)
-                       return err;
-               if (shdev->pdata.mode & SHDMA_DMAOR1) {
-                       err = sh_dmae_rst(1);
-                       if (err)
-                               return err;
+       /* halt the dma controller */
+       sh_dmae_ctl_stop(shdev);
+
+       /* We cannot detect, which channel caused the error, have to reset all */
+       for (i = 0; i < SH_DMAC_MAX_CHANNELS; i++) {
+               struct sh_dmae_chan *sh_chan = shdev->chan[i];
+               struct sh_desc *desc;
+               LIST_HEAD(dl);
+
+               if (!sh_chan)
+                       continue;
+
+               spin_lock(&sh_chan->desc_lock);
+
+               /* Stop the channel */
+               dmae_halt(sh_chan);
+
+               list_splice_init(&sh_chan->ld_queue, &dl);
+
+               spin_unlock(&sh_chan->desc_lock);
+
+               /* Complete all  */
+               list_for_each_entry(desc, &dl, node) {
+                       struct dma_async_tx_descriptor *tx = &desc->async_tx;
+                       desc->mark = DESC_IDLE;
+                       if (tx->callback)
+                               tx->callback(tx->callback_param);
                }
-               disable_irq(irq);
-               return IRQ_HANDLED;
+
+               spin_lock(&sh_chan->desc_lock);
+               list_splice(&dl, &sh_chan->ld_free);
+               spin_unlock(&sh_chan->desc_lock);
+
+               handled++;
        }
+
+       sh_dmae_rst(shdev);
+
+       return !!handled;
+}
+
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+       struct sh_dmae_device *shdev = data;
+
+       if (!(dmaor_read(shdev) & DMAOR_AE))
+               return IRQ_NONE;
+
+       sh_dmae_reset(data);
+       return IRQ_HANDLED;
 }
-#endif
 
 static void dmae_do_tasklet(unsigned long data)
 {
        struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
-       struct sh_desc *desc, *_desc, *cur_desc = NULL;
+       struct sh_desc *desc;
        u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
-       list_for_each_entry_safe(desc, _desc,
-                                       &sh_chan->ld_queue, node) {
-               if ((desc->hw.sar + desc->hw.tcr) == sar_buf) {
-                       cur_desc = desc;
+       u32 dar_buf = sh_dmae_readl(sh_chan, DAR);
+
+       spin_lock(&sh_chan->desc_lock);
+       list_for_each_entry(desc, &sh_chan->ld_queue, node) {
+               if (desc->mark == DESC_SUBMITTED &&
+                   ((desc->direction == DMA_FROM_DEVICE &&
+                     (desc->hw.dar + desc->hw.tcr) == dar_buf) ||
+                    (desc->hw.sar + desc->hw.tcr) == sar_buf)) {
+                       dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n",
+                               desc->async_tx.cookie, &desc->async_tx,
+                               desc->hw.dar);
+                       desc->mark = DESC_COMPLETED;
                        break;
                }
        }
+       spin_unlock(&sh_chan->desc_lock);
 
-       if (cur_desc) {
-               switch (cur_desc->async_tx.cookie) {
-               case 0: /* other desc data */
-                       break;
-               case -EBUSY: /* last desc */
-               sh_chan->completed_cookie =
-                               cur_desc->async_tx.cookie;
-                       break;
-               default: /* first desc ( 0 < )*/
-                       sh_chan->completed_cookie =
-                               cur_desc->async_tx.cookie - 1;
-                       break;
-               }
-               cur_desc->mark = DESC_COMP;
-       }
        /* Next desc */
        sh_chan_xfer_ld_queue(sh_chan);
-       sh_dmae_chan_ld_cleanup(sh_chan);
+       sh_dmae_chan_ld_cleanup(sh_chan, false);
+}
+
+static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev)
+{
+       /* Fast path out if NMIF is not asserted for this controller */
+       if ((dmaor_read(shdev) & DMAOR_NMIF) == 0)
+               return false;
+
+       return sh_dmae_reset(shdev);
 }
 
-static unsigned int get_dmae_irq(unsigned int id)
+static int sh_dmae_nmi_handler(struct notifier_block *self,
+                              unsigned long cmd, void *data)
 {
-       unsigned int irq = 0;
-       if (id < ARRAY_SIZE(dmte_irq_map))
-               irq = dmte_irq_map[id];
-       return irq;
+       struct sh_dmae_device *shdev;
+       int ret = NOTIFY_DONE;
+       bool triggered;
+
+       /*
+        * Only concern ourselves with NMI events.
+        *
+        * Normally we would check the die chain value, but as this needs
+        * to be architecture independent, check for NMI context instead.
+        */
+       if (!in_nmi())
+               return NOTIFY_DONE;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(shdev, &sh_dmae_devices, node) {
+               /*
+                * Only stop if one of the controllers has NMIF asserted,
+                * we do not want to interfere with regular address error
+                * handling or NMI events that don't concern the DMACs.
+                */
+               triggered = sh_dmae_nmi_notify(shdev);
+               if (triggered == true)
+                       ret = NOTIFY_OK;
+       }
+       rcu_read_unlock();
+
+       return ret;
 }
 
-static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
+static struct notifier_block sh_dmae_nmi_notifier __read_mostly = {
+       .notifier_call  = sh_dmae_nmi_handler,
+
+       /* Run before NMI debug handler and KGDB */
+       .priority       = 1,
+};
+
+static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
+                                       int irq, unsigned long flags)
 {
        int err;
-       unsigned int irq = get_dmae_irq(id);
-       unsigned long irqflags = IRQF_DISABLED;
+       const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id];
+       struct platform_device *pdev = to_platform_device(shdev->common.dev);
        struct sh_dmae_chan *new_sh_chan;
 
        /* alloc channel */
        new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
        if (!new_sh_chan) {
-               dev_err(shdev->common.dev, "No free memory for allocating "
-                               "dma channels!\n");
+               dev_err(shdev->common.dev,
+                       "No free memory for allocating dma channels!\n");
                return -ENOMEM;
        }
 
+       /* copy struct dma_device */
+       new_sh_chan->common.device = &shdev->common;
+
        new_sh_chan->dev = shdev->common.dev;
        new_sh_chan->id = id;
+       new_sh_chan->irq = irq;
+       new_sh_chan->base = shdev->chan_reg + chan_pdata->offset / sizeof(u32);
 
        /* Init DMA tasklet */
        tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
                        (unsigned long)new_sh_chan);
 
-       /* Init the channel */
-       dmae_init(new_sh_chan);
-
        spin_lock_init(&new_sh_chan->desc_lock);
 
        /* Init descripter manage list */
        INIT_LIST_HEAD(&new_sh_chan->ld_queue);
        INIT_LIST_HEAD(&new_sh_chan->ld_free);
 
-       /* copy struct dma_device */
-       new_sh_chan->common.device = &shdev->common;
-
        /* Add the channel to DMA device channel list */
        list_add_tail(&new_sh_chan->common.device_node,
                        &shdev->common.channels);
        shdev->common.chancnt++;
 
-       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
-               irqflags = IRQF_SHARED;
-#if defined(DMTE6_IRQ)
-               if (irq >= DMTE6_IRQ)
-                       irq = DMTE6_IRQ;
-               else
-#endif
-                       irq = DMTE0_IRQ;
-       }
-
-       snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
-                       "sh-dmae%d", new_sh_chan->id);
+       if (pdev->id >= 0)
+               snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+                        "sh-dmae%d.%d", pdev->id, new_sh_chan->id);
+       else
+               snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+                        "sh-dma%d", new_sh_chan->id);
 
        /* set up channel irq */
-       err = request_irq(irq, &sh_dmae_interrupt,
-               irqflags, new_sh_chan->dev_id, new_sh_chan);
+       err = request_irq(irq, &sh_dmae_interrupt, flags,
+                         new_sh_chan->dev_id, new_sh_chan);
        if (err) {
                dev_err(shdev->common.dev, "DMA channel %d request_irq error "
                        "with return %d\n", id, err);
                goto err_no_irq;
        }
 
-       /* CHCR register control function */
-       new_sh_chan->set_chcr = dmae_set_chcr;
-       /* DMARS register control function */
-       new_sh_chan->set_dmars = dmae_set_dmars;
-
        shdev->chan[id] = new_sh_chan;
        return 0;
 
@@ -615,12 +1079,12 @@ static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
 
        for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
                if (shdev->chan[i]) {
-                       struct sh_dmae_chan *shchan = shdev->chan[i];
-                       if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
-                               free_irq(dmte_irq_map[i], shchan);
+                       struct sh_dmae_chan *sh_chan = shdev->chan[i];
+
+                       free_irq(sh_chan->irq, sh_chan);
 
-                       list_del(&shchan->common.device_node);
-                       kfree(shchan);
+                       list_del(&sh_chan->common.device_node);
+                       kfree(sh_chan);
                        shdev->chan[i] = NULL;
                }
        }
@@ -629,85 +1093,196 @@ static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
 
 static int __init sh_dmae_probe(struct platform_device *pdev)
 {
-       int err = 0, cnt, ecnt;
-       unsigned long irqflags = IRQF_DISABLED;
-#if defined(CONFIG_CPU_SH4)
-       int eirq[] = { DMAE0_IRQ,
-#if defined(DMAE1_IRQ)
-                       DMAE1_IRQ
-#endif
-               };
-#endif
+       struct sh_dmae_pdata *pdata = pdev->dev.platform_data;
+       unsigned long irqflags = IRQF_DISABLED,
+               chan_flag[SH_DMAC_MAX_CHANNELS] = {};
+       int errirq, chan_irq[SH_DMAC_MAX_CHANNELS];
+       int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
        struct sh_dmae_device *shdev;
+       struct resource *chan, *dmars, *errirq_res, *chanirq_res;
 
+       /* get platform data */
+       if (!pdata || !pdata->channel_num)
+               return -ENODEV;
+
+       chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       /* DMARS area is optional */
+       dmars = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       /*
+        * IRQ resources:
+        * 1. there always must be at least one IRQ IO-resource. On SH4 it is
+        *    the error IRQ, in which case it is the only IRQ in this resource:
+        *    start == end. If it is the only IRQ resource, all channels also
+        *    use the same IRQ.
+        * 2. DMA channel IRQ resources can be specified one per resource or in
+        *    ranges (start != end)
+        * 3. iff all events (channels and, optionally, error) on this
+        *    controller use the same IRQ, only one IRQ resource can be
+        *    specified, otherwise there must be one IRQ per channel, even if
+        *    some of them are equal
+        * 4. if all IRQs on this controller are equal or if some specific IRQs
+        *    specify IORESOURCE_IRQ_SHAREABLE in their resources, they will be
+        *    requested with the IRQF_SHARED flag
+        */
+       errirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (!chan || !errirq_res)
+               return -ENODEV;
+
+       if (!request_mem_region(chan->start, resource_size(chan), pdev->name)) {
+               dev_err(&pdev->dev, "DMAC register region already claimed\n");
+               return -EBUSY;
+       }
+
+       if (dmars && !request_mem_region(dmars->start, resource_size(dmars), pdev->name)) {
+               dev_err(&pdev->dev, "DMAC DMARS region already claimed\n");
+               err = -EBUSY;
+               goto ermrdmars;
+       }
+
+       err = -ENOMEM;
        shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
        if (!shdev) {
-               dev_err(&pdev->dev, "No enough memory\n");
-               err = -ENOMEM;
-               goto shdev_err;
+               dev_err(&pdev->dev, "Not enough memory\n");
+               goto ealloc;
        }
 
-       /* get platform data */
-       if (!pdev->dev.platform_data)
-               goto shdev_err;
+       shdev->chan_reg = ioremap(chan->start, resource_size(chan));
+       if (!shdev->chan_reg)
+               goto emapchan;
+       if (dmars) {
+               shdev->dmars = ioremap(dmars->start, resource_size(dmars));
+               if (!shdev->dmars)
+                       goto emapdmars;
+       }
 
        /* platform data */
-       memcpy(&shdev->pdata, pdev->dev.platform_data,
-                       sizeof(struct sh_dmae_pdata));
+       shdev->pdata = pdata;
 
-       /* reset dma controller */
-       err = sh_dmae_rst(0);
+       if (pdata->chcr_offset)
+               shdev->chcr_offset = pdata->chcr_offset;
+       else
+               shdev->chcr_offset = CHCR;
+
+       if (pdata->chcr_ie_bit)
+               shdev->chcr_ie_bit = pdata->chcr_ie_bit;
+       else
+               shdev->chcr_ie_bit = CHCR_IE;
+
+       platform_set_drvdata(pdev, shdev);
+
+       pm_runtime_enable(&pdev->dev);
+       pm_runtime_get_sync(&pdev->dev);
+
+       spin_lock_irq(&sh_dmae_lock);
+       list_add_tail_rcu(&shdev->node, &sh_dmae_devices);
+       spin_unlock_irq(&sh_dmae_lock);
+
+       /* reset dma controller - only needed as a test */
+       err = sh_dmae_rst(shdev);
        if (err)
                goto rst_err;
 
-       /* SH7780/85/23 has DMAOR1 */
-       if (shdev->pdata.mode & SHDMA_DMAOR1) {
-               err = sh_dmae_rst(1);
-               if (err)
-                       goto rst_err;
-       }
-
        INIT_LIST_HEAD(&shdev->common.channels);
 
        dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+       if (pdata->slave && pdata->slave_num)
+               dma_cap_set(DMA_SLAVE, shdev->common.cap_mask);
+
        shdev->common.device_alloc_chan_resources
                = sh_dmae_alloc_chan_resources;
        shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
        shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
-       shdev->common.device_is_tx_complete = sh_dmae_is_complete;
+       shdev->common.device_tx_status = sh_dmae_tx_status;
        shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
+
+       /* Compulsory for DMA_SLAVE fields */
+       shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg;
+       shdev->common.device_control = sh_dmae_control;
+
        shdev->common.dev = &pdev->dev;
+       /* Default transfer size of 32 bytes requires 32-byte alignment */
+       shdev->common.copy_align = LOG2_DEFAULT_XFER_SIZE;
+
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+       chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
+
+       if (!chanirq_res)
+               chanirq_res = errirq_res;
+       else
+               irqres++;
 
-#if defined(CONFIG_CPU_SH4)
-       /* Non Mix IRQ mode SH7722/SH7730 etc... */
-       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+       if (chanirq_res == errirq_res ||
+           (errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE)
                irqflags = IRQF_SHARED;
-               eirq[0] = DMTE0_IRQ;
-#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
-               eirq[1] = DMTE6_IRQ;
-#endif
+
+       errirq = errirq_res->start;
+
+       err = request_irq(errirq, sh_dmae_err, irqflags,
+                         "DMAC Address Error", shdev);
+       if (err) {
+               dev_err(&pdev->dev,
+                       "DMA failed requesting irq #%d, error %d\n",
+                       errirq, err);
+               goto eirq_err;
        }
 
-       for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
-               err = request_irq(eirq[ecnt], sh_dmae_err,
-                       irqflags, "DMAC Address Error", shdev);
-               if (err) {
-                       dev_err(&pdev->dev, "DMA device request_irq"
-                               "error (irq %d) with return %d\n",
-                               eirq[ecnt], err);
-                       goto eirq_err;
+#else
+       chanirq_res = errirq_res;
+#endif /* CONFIG_CPU_SH4 || CONFIG_ARCH_SHMOBILE */
+
+       if (chanirq_res->start == chanirq_res->end &&
+           !platform_get_resource(pdev, IORESOURCE_IRQ, 1)) {
+               /* Special case - all multiplexed */
+               for (; irq_cnt < pdata->channel_num; irq_cnt++) {
+                       if (irq_cnt < SH_DMAC_MAX_CHANNELS) {
+                               chan_irq[irq_cnt] = chanirq_res->start;
+                               chan_flag[irq_cnt] = IRQF_SHARED;
+                       } else {
+                               irq_cap = 1;
+                               break;
+                       }
                }
+       } else {
+               do {
+                       for (i = chanirq_res->start; i <= chanirq_res->end; i++) {
+                               if (irq_cnt >= SH_DMAC_MAX_CHANNELS) {
+                                       irq_cap = 1;
+                                       break;
+                               }
+
+                               if ((errirq_res->flags & IORESOURCE_BITS) ==
+                                   IORESOURCE_IRQ_SHAREABLE)
+                                       chan_flag[irq_cnt] = IRQF_SHARED;
+                               else
+                                       chan_flag[irq_cnt] = IRQF_DISABLED;
+                               dev_dbg(&pdev->dev,
+                                       "Found IRQ %d for channel %d\n",
+                                       i, irq_cnt);
+                               chan_irq[irq_cnt++] = i;
+                       }
+
+                       if (irq_cnt >= SH_DMAC_MAX_CHANNELS)
+                               break;
+
+                       chanirq_res = platform_get_resource(pdev,
+                                               IORESOURCE_IRQ, ++irqres);
+               } while (irq_cnt < pdata->channel_num && chanirq_res);
        }
-#endif /* CONFIG_CPU_SH4 */
 
        /* Create DMA Channel */
-       for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
-               err = sh_dmae_chan_probe(shdev, cnt);
+       for (i = 0; i < irq_cnt; i++) {
+               err = sh_dmae_chan_probe(shdev, i, chan_irq[i], chan_flag[i]);
                if (err)
                        goto chan_probe_err;
        }
 
-       platform_set_drvdata(pdev, shdev);
+       if (irq_cap)
+               dev_notice(&pdev->dev, "Attempting to register %d DMA "
+                          "channels when a maximum of %d are supported.\n",
+                          pdata->channel_num, SH_DMAC_MAX_CHANNELS);
+
+       pm_runtime_put(&pdev->dev);
+
        dma_async_device_register(&shdev->common);
 
        return err;
@@ -715,62 +1290,163 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 chan_probe_err:
        sh_dmae_chan_remove(shdev);
 
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+       free_irq(errirq, shdev);
 eirq_err:
-       for (ecnt-- ; ecnt >= 0; ecnt--)
-               free_irq(eirq[ecnt], shdev);
-
+#endif
 rst_err:
+       spin_lock_irq(&sh_dmae_lock);
+       list_del_rcu(&shdev->node);
+       spin_unlock_irq(&sh_dmae_lock);
+
+       pm_runtime_put(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+
+       if (dmars)
+               iounmap(shdev->dmars);
+
+       platform_set_drvdata(pdev, NULL);
+emapdmars:
+       iounmap(shdev->chan_reg);
+       synchronize_rcu();
+emapchan:
        kfree(shdev);
+ealloc:
+       if (dmars)
+               release_mem_region(dmars->start, resource_size(dmars));
+ermrdmars:
+       release_mem_region(chan->start, resource_size(chan));
 
-shdev_err:
        return err;
 }
 
 static int __exit sh_dmae_remove(struct platform_device *pdev)
 {
        struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+       struct resource *res;
+       int errirq = platform_get_irq(pdev, 0);
 
        dma_async_device_unregister(&shdev->common);
 
-       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
-               free_irq(DMTE0_IRQ, shdev);
-#if defined(DMTE6_IRQ)
-               free_irq(DMTE6_IRQ, shdev);
-#endif
-       }
+       if (errirq > 0)
+               free_irq(errirq, shdev);
+
+       spin_lock_irq(&sh_dmae_lock);
+       list_del_rcu(&shdev->node);
+       spin_unlock_irq(&sh_dmae_lock);
 
        /* channel data remove */
        sh_dmae_chan_remove(shdev);
 
-       if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
-               free_irq(DMAE0_IRQ, shdev);
-#if defined(DMAE1_IRQ)
-               free_irq(DMAE1_IRQ, shdev);
-#endif
-       }
+       pm_runtime_disable(&pdev->dev);
+
+       if (shdev->dmars)
+               iounmap(shdev->dmars);
+       iounmap(shdev->chan_reg);
+
+       platform_set_drvdata(pdev, NULL);
+
+       synchronize_rcu();
        kfree(shdev);
 
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (res)
+               release_mem_region(res->start, resource_size(res));
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (res)
+               release_mem_region(res->start, resource_size(res));
+
        return 0;
 }
 
 static void sh_dmae_shutdown(struct platform_device *pdev)
 {
        struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
-       sh_dmae_ctl_stop(0);
-       if (shdev->pdata.mode & SHDMA_DMAOR1)
-               sh_dmae_ctl_stop(1);
+       sh_dmae_ctl_stop(shdev);
+}
+
+static int sh_dmae_runtime_suspend(struct device *dev)
+{
+       return 0;
+}
+
+static int sh_dmae_runtime_resume(struct device *dev)
+{
+       struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+       return sh_dmae_rst(shdev);
+}
+
+#ifdef CONFIG_PM
+static int sh_dmae_suspend(struct device *dev)
+{
+       struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+       int i;
+
+       for (i = 0; i < shdev->pdata->channel_num; i++) {
+               struct sh_dmae_chan *sh_chan = shdev->chan[i];
+               if (sh_chan->descs_allocated)
+                       sh_chan->pm_error = pm_runtime_put_sync(dev);
+       }
+
+       return 0;
 }
 
+static int sh_dmae_resume(struct device *dev)
+{
+       struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+       int i;
+
+       for (i = 0; i < shdev->pdata->channel_num; i++) {
+               struct sh_dmae_chan *sh_chan = shdev->chan[i];
+               struct sh_dmae_slave *param = sh_chan->common.private;
+
+               if (!sh_chan->descs_allocated)
+                       continue;
+
+               if (!sh_chan->pm_error)
+                       pm_runtime_get_sync(dev);
+
+               if (param) {
+                       const struct sh_dmae_slave_config *cfg = param->config;
+                       dmae_set_dmars(sh_chan, cfg->mid_rid);
+                       dmae_set_chcr(sh_chan, cfg->chcr);
+               } else {
+                       dmae_init(sh_chan);
+               }
+       }
+
+       return 0;
+}
+#else
+#define sh_dmae_suspend NULL
+#define sh_dmae_resume NULL
+#endif
+
+const struct dev_pm_ops sh_dmae_pm = {
+       .suspend                = sh_dmae_suspend,
+       .resume                 = sh_dmae_resume,
+       .runtime_suspend        = sh_dmae_runtime_suspend,
+       .runtime_resume         = sh_dmae_runtime_resume,
+};
+
 static struct platform_driver sh_dmae_driver = {
        .remove         = __exit_p(sh_dmae_remove),
        .shutdown       = sh_dmae_shutdown,
        .driver = {
+               .owner  = THIS_MODULE,
                .name   = "sh-dma-engine",
+               .pm     = &sh_dmae_pm,
        },
 };
 
 static int __init sh_dmae_init(void)
 {
+       /* Wire up NMI handling */
+       int err = register_die_notifier(&sh_dmae_nmi_notifier);
+       if (err)
+               return err;
+
        return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
 }
 module_init(sh_dmae_init);
@@ -778,9 +1454,12 @@ module_init(sh_dmae_init);
 static void __exit sh_dmae_exit(void)
 {
        platform_driver_unregister(&sh_dmae_driver);
+
+       unregister_die_notifier(&sh_dmae_nmi_notifier);
 }
 module_exit(sh_dmae_exit);
 
 MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
 MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:sh-dma-engine");