Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 23 Jul 2008 19:03:18 +0000 (12:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 23 Jul 2008 19:03:18 +0000 (12:03 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx: (24 commits)
  I/OAT: I/OAT version 3.0 support
  I/OAT: tcp_dma_copybreak default value dependent on I/OAT version
  I/OAT: Add watchdog/reset functionality to ioatdma
  iop_adma: cleanup iop_chan_xor_slot_count
  iop_adma: document how to calculate the minimum descriptor pool size
  iop_adma: directly reclaim descriptors on allocation failure
  async_tx: make async_tx_test_ack a boolean routine
  async_tx: remove depend_tx from async_tx_sync_epilog
  async_tx: export async_tx_quiesce
  async_tx: fix handling of the "out of descriptor" condition in async_xor
  async_tx: ensure the xor destination buffer remains dma-mapped
  async_tx: list_for_each_entry_rcu() cleanup
  dmaengine: Driver for the Synopsys DesignWare DMA controller
  dmaengine: Add slave DMA interface
  dmaengine: add DMA_COMPL_SKIP_{SRC,DEST}_UNMAP flags to control dma unmap
  dmaengine: Add dma_client parameter to device_alloc_chan_resources
  dmatest: Simple DMA memcpy test client
  dmaengine: DMA engine driver for Marvell XOR engine
  iop-adma: fix platform driver hotplug/coldplug
  dmaengine: track the number of clients using a channel
  ...

Fixed up conflict in drivers/dca/dca-sysfs.c manually

33 files changed:
arch/avr32/mach-at32ap/at32ap700x.c
crypto/async_tx/async_memcpy.c
crypto/async_tx/async_memset.c
crypto/async_tx/async_tx.c
crypto/async_tx/async_xor.c
drivers/dca/dca-core.c
drivers/dca/dca-sysfs.c
drivers/dma/Kconfig
drivers/dma/Makefile
drivers/dma/dmaengine.c
drivers/dma/dmatest.c [new file with mode: 0644]
drivers/dma/dw_dmac.c [new file with mode: 0644]
drivers/dma/dw_dmac_regs.h [new file with mode: 0644]
drivers/dma/fsldma.c
drivers/dma/ioat.c
drivers/dma/ioat_dca.c
drivers/dma/ioat_dma.c
drivers/dma/ioatdma.h
drivers/dma/ioatdma_hw.h
drivers/dma/ioatdma_registers.h
drivers/dma/iop-adma.c
drivers/dma/mv_xor.c [new file with mode: 0644]
drivers/dma/mv_xor.h [new file with mode: 0644]
include/asm-arm/arch-iop13xx/adma.h
include/asm-arm/hardware/iop3xx-adma.h
include/asm-arm/plat-orion/mv_xor.h [new file with mode: 0644]
include/asm-avr32/arch-at32ap/at32ap700x.h
include/linux/async_tx.h
include/linux/dca.h
include/linux/dmaengine.h
include/linux/dw_dmac.h [new file with mode: 0644]
include/linux/pci_ids.h
net/core/user_dma.c

index 021d5121718469387fc1c4d95dd797a12f40bb3b..604f44f5dd164833a9bfdb075a7d65a713617fff 100644 (file)
@@ -7,6 +7,7 @@
  */
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/dw_dmac.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
@@ -594,6 +595,17 @@ static void __init genclk_init_parent(struct clk *clk)
        clk->parent = parent;
 }
 
+static struct dw_dma_platform_data dw_dmac0_data = {
+       .nr_channels    = 3,
+};
+
+static struct resource dw_dmac0_resource[] = {
+       PBMEM(0xff200000),
+       IRQ(2),
+};
+DEFINE_DEV_DATA(dw_dmac, 0);
+DEV_CLK(hclk, dw_dmac0, hsb, 10);
+
 /* --------------------------------------------------------------------
  *  System peripherals
  * -------------------------------------------------------------------- */
@@ -708,17 +720,6 @@ static struct clk pico_clk = {
        .users          = 1,
 };
 
-static struct resource dmaca0_resource[] = {
-       {
-               .start  = 0xff200000,
-               .end    = 0xff20ffff,
-               .flags  = IORESOURCE_MEM,
-       },
-       IRQ(2),
-};
-DEFINE_DEV(dmaca, 0);
-DEV_CLK(hclk, dmaca0, hsb, 10);
-
 /* --------------------------------------------------------------------
  * HMATRIX
  * -------------------------------------------------------------------- */
@@ -831,7 +832,7 @@ void __init at32_add_system_devices(void)
        platform_device_register(&at32_eic0_device);
        platform_device_register(&smc0_device);
        platform_device_register(&pdc_device);
-       platform_device_register(&dmaca0_device);
+       platform_device_register(&dw_dmac0_device);
 
        platform_device_register(&at32_tcb0_device);
        platform_device_register(&at32_tcb1_device);
@@ -2032,7 +2033,7 @@ struct clk *at32_clock_list[] = {
        &smc0_mck,
        &pdc_hclk,
        &pdc_pclk,
-       &dmaca0_hclk,
+       &dw_dmac0_hclk,
        &pico_clk,
        &pio0_mck,
        &pio1_mck,
index a5eda80e84277806b4cbb49c32d55dad233df381..ddccfb01c416b9a636324ee273244b147c644a16 100644 (file)
@@ -73,15 +73,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
                pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
                /* wait for any prerequisite operations */
-               if (depend_tx) {
-                       /* if ack is already set then we cannot be sure
-                        * we are referring to the correct operation
-                        */
-                       BUG_ON(async_tx_test_ack(depend_tx));
-                       if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
-                               panic("%s: DMA_ERROR waiting for depend_tx\n",
-                                       __func__);
-               }
+               async_tx_quiesce(&depend_tx);
 
                dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
                src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -91,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
                kunmap_atomic(dest_buf, KM_USER0);
                kunmap_atomic(src_buf, KM_USER1);
 
-               async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+               async_tx_sync_epilog(cb_fn, cb_param);
        }
 
        return tx;
index f5ff3906b035d52d5c6b28949ca6470b46d60f88..5b5eb99bb244311bb1456c62080249d812967be4 100644 (file)
@@ -72,19 +72,11 @@ async_memset(struct page *dest, int val, unsigned int offset,
                dest_buf = (void *) (((char *) page_address(dest)) + offset);
 
                /* wait for any prerequisite operations */
-               if (depend_tx) {
-                       /* if ack is already set then we cannot be sure
-                        * we are referring to the correct operation
-                        */
-                       BUG_ON(depend_tx->ack);
-                       if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
-                               panic("%s: DMA_ERROR waiting for depend_tx\n",
-                                       __func__);
-               }
+               async_tx_quiesce(&depend_tx);
 
                memset(dest_buf, val, len);
 
-               async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+               async_tx_sync_epilog(cb_fn, cb_param);
        }
 
        return tx;
index 095c798d31700746d8bd6769e3f5f8d015d7e264..85eaf7b1c53153ef07bdc33efa3a9d9c6cdff839 100644 (file)
@@ -295,7 +295,7 @@ dma_channel_add_remove(struct dma_client *client,
        case DMA_RESOURCE_REMOVED:
                found = 0;
                spin_lock_irqsave(&async_tx_lock, flags);
-               list_for_each_entry_rcu(ref, &async_tx_master_list, node)
+               list_for_each_entry(ref, &async_tx_master_list, node)
                        if (ref->chan == chan) {
                                /* permit backing devices to go away */
                                dma_chan_put(ref->chan);
@@ -608,23 +608,34 @@ async_trigger_callback(enum async_tx_flags flags,
                pr_debug("%s: (sync)\n", __func__);
 
                /* wait for any prerequisite operations */
-               if (depend_tx) {
-                       /* if ack is already set then we cannot be sure
-                        * we are referring to the correct operation
-                        */
-                       BUG_ON(async_tx_test_ack(depend_tx));
-                       if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
-                               panic("%s: DMA_ERROR waiting for depend_tx\n",
-                                       __func__);
-               }
+               async_tx_quiesce(&depend_tx);
 
-               async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+               async_tx_sync_epilog(cb_fn, cb_param);
        }
 
        return tx;
 }
 EXPORT_SYMBOL_GPL(async_trigger_callback);
 
+/**
+ * async_tx_quiesce - ensure tx is complete and freeable upon return
+ * @tx - transaction to quiesce
+ */
+void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
+{
+       if (*tx) {
+               /* if ack is already set then we cannot be sure
+                * we are referring to the correct operation
+                */
+               BUG_ON(async_tx_test_ack(*tx));
+               if (dma_wait_for_async_tx(*tx) == DMA_ERROR)
+                       panic("DMA_ERROR waiting for transaction\n");
+               async_tx_ack(*tx);
+               *tx = NULL;
+       }
+}
+EXPORT_SYMBOL_GPL(async_tx_quiesce);
+
 module_init(async_tx_init);
 module_exit(async_tx_exit);
 
index 3a0dddca5a1097e473d103a15797547da66e7bf1..65974c6d3d7a78dbc40e9437b416d5990e04e9a2 100644 (file)
  *     when CONFIG_DMA_ENGINE=n
  */
 static __always_inline struct dma_async_tx_descriptor *
-do_async_xor(struct dma_device *device,
-       struct dma_chan *chan, struct page *dest, struct page **src_list,
-       unsigned int offset, unsigned int src_cnt, size_t len,
-       enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
+            unsigned int offset, int src_cnt, size_t len,
+            enum async_tx_flags flags,
+            struct dma_async_tx_descriptor *depend_tx,
+            dma_async_tx_callback cb_fn, void *cb_param)
 {
-       dma_addr_t dma_dest;
+       struct dma_device *dma = chan->device;
        dma_addr_t *dma_src = (dma_addr_t *) src_list;
-       struct dma_async_tx_descriptor *tx;
+       struct dma_async_tx_descriptor *tx = NULL;
+       int src_off = 0;
        int i;
-       unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
-
-       pr_debug("%s: len: %zu\n", __func__, len);
-
-       dma_dest = dma_map_page(device->dev, dest, offset, len,
-                               DMA_FROM_DEVICE);
+       dma_async_tx_callback _cb_fn;
+       void *_cb_param;
+       enum async_tx_flags async_flags;
+       enum dma_ctrl_flags dma_flags;
+       int xor_src_cnt;
+       dma_addr_t dma_dest;
 
+       dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_FROM_DEVICE);
        for (i = 0; i < src_cnt; i++)
-               dma_src[i] = dma_map_page(device->dev, src_list[i], offset,
+               dma_src[i] = dma_map_page(dma->dev, src_list[i], offset,
                                          len, DMA_TO_DEVICE);
 
-       /* Since we have clobbered the src_list we are committed
-        * to doing this asynchronously.  Drivers force forward progress
-        * in case they can not provide a descriptor
-        */
-       tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len,
-                                        dma_prep_flags);
-       if (!tx) {
-               if (depend_tx)
-                       dma_wait_for_async_tx(depend_tx);
-
-               while (!tx)
-                       tx = device->device_prep_dma_xor(chan, dma_dest,
-                                                        dma_src, src_cnt, len,
-                                                        dma_prep_flags);
-       }
+       while (src_cnt) {
+               async_flags = flags;
+               dma_flags = 0;
+               xor_src_cnt = min(src_cnt, dma->max_xor);
+               /* if we are submitting additional xors, leave the chain open,
+                * clear the callback parameters, and leave the destination
+                * buffer mapped
+                */
+               if (src_cnt > xor_src_cnt) {
+                       async_flags &= ~ASYNC_TX_ACK;
+                       dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
+                       _cb_fn = NULL;
+                       _cb_param = NULL;
+               } else {
+                       _cb_fn = cb_fn;
+                       _cb_param = cb_param;
+               }
+               if (_cb_fn)
+                       dma_flags |= DMA_PREP_INTERRUPT;
 
-       async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               /* Since we have clobbered the src_list we are committed
+                * to doing this asynchronously.  Drivers force forward progress
+                * in case they can not provide a descriptor
+                */
+               tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
+                                             xor_src_cnt, len, dma_flags);
+
+               if (unlikely(!tx))
+                       async_tx_quiesce(&depend_tx);
+
+               /* spin wait for the preceeding transactions to complete */
+               while (unlikely(!tx)) {
+                       dma_async_issue_pending(chan);
+                       tx = dma->device_prep_dma_xor(chan, dma_dest,
+                                                     &dma_src[src_off],
+                                                     xor_src_cnt, len,
+                                                     dma_flags);
+               }
+
+               async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
+                               _cb_param);
+
+               depend_tx = tx;
+               flags |= ASYNC_TX_DEP_ACK;
+
+               if (src_cnt > xor_src_cnt) {
+                       /* drop completed sources */
+                       src_cnt -= xor_src_cnt;
+                       src_off += xor_src_cnt;
+
+                       /* use the intermediate result a source */
+                       dma_src[--src_off] = dma_dest;
+                       src_cnt++;
+               } else
+                       break;
+       }
 
        return tx;
 }
 
 static void
 do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
-       unsigned int src_cnt, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+           int src_cnt, size_t len, enum async_tx_flags flags,
+           dma_async_tx_callback cb_fn, void *cb_param)
 {
-       void *_dest;
        int i;
-
-       pr_debug("%s: len: %zu\n", __func__, len);
+       int xor_src_cnt;
+       int src_off = 0;
+       void *dest_buf;
+       void **srcs = (void **) src_list;
 
        /* reuse the 'src_list' array to convert to buffer pointers */
        for (i = 0; i < src_cnt; i++)
-               src_list[i] = (struct page *)
-                       (page_address(src_list[i]) + offset);
+               srcs[i] = page_address(src_list[i]) + offset;
 
        /* set destination address */
-       _dest = page_address(dest) + offset;
+       dest_buf = page_address(dest) + offset;
 
        if (flags & ASYNC_TX_XOR_ZERO_DST)
-               memset(_dest, 0, len);
+               memset(dest_buf, 0, len);
 
-       xor_blocks(src_cnt, len, _dest,
-               (void **) src_list);
+       while (src_cnt > 0) {
+               /* process up to 'MAX_XOR_BLOCKS' sources */
+               xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
+               xor_blocks(xor_src_cnt, len, dest_buf, &srcs[src_off]);
 
-       async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+               /* drop completed sources */
+               src_cnt -= xor_src_cnt;
+               src_off += xor_src_cnt;
+       }
+
+       async_tx_sync_epilog(cb_fn, cb_param);
 }
 
 /**
@@ -132,106 +179,34 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
        struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
                                                      &dest, 1, src_list,
                                                      src_cnt, len);
-       struct dma_device *device = chan ? chan->device : NULL;
-       struct dma_async_tx_descriptor *tx = NULL;
-       dma_async_tx_callback _cb_fn;
-       void *_cb_param;
-       unsigned long local_flags;
-       int xor_src_cnt;
-       int i = 0, src_off = 0;
-
        BUG_ON(src_cnt <= 1);
 
-       while (src_cnt) {
-               local_flags = flags;
-               if (device) { /* run the xor asynchronously */
-                       xor_src_cnt = min(src_cnt, device->max_xor);
-                       /* if we are submitting additional xors
-                        * only set the callback on the last transaction
-                        */
-                       if (src_cnt > xor_src_cnt) {
-                               local_flags &= ~ASYNC_TX_ACK;
-                               _cb_fn = NULL;
-                               _cb_param = NULL;
-                       } else {
-                               _cb_fn = cb_fn;
-                               _cb_param = cb_param;
-                       }
-
-                       tx = do_async_xor(device, chan, dest,
-                                         &src_list[src_off], offset,
-                                         xor_src_cnt, len, local_flags,
-                                         depend_tx, _cb_fn, _cb_param);
-               } else { /* run the xor synchronously */
-                       /* in the sync case the dest is an implied source
-                        * (assumes the dest is at the src_off index)
-                        */
-                       if (flags & ASYNC_TX_XOR_DROP_DST) {
-                               src_cnt--;
-                               src_off++;
-                       }
-
-                       /* process up to 'MAX_XOR_BLOCKS' sources */
-                       xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
-
-                       /* if we are submitting additional xors
-                        * only set the callback on the last transaction
-                        */
-                       if (src_cnt > xor_src_cnt) {
-                               local_flags &= ~ASYNC_TX_ACK;
-                               _cb_fn = NULL;
-                               _cb_param = NULL;
-                       } else {
-                               _cb_fn = cb_fn;
-                               _cb_param = cb_param;
-                       }
-
-                       /* wait for any prerequisite operations */
-                       if (depend_tx) {
-                               /* if ack is already set then we cannot be sure
-                                * we are referring to the correct operation
-                                */
-                               BUG_ON(async_tx_test_ack(depend_tx));
-                               if (dma_wait_for_async_tx(depend_tx) ==
-                                       DMA_ERROR)
-                                       panic("%s: DMA_ERROR waiting for "
-                                               "depend_tx\n",
-                                               __func__);
-                       }
-
-                       do_sync_xor(dest, &src_list[src_off], offset,
-                               xor_src_cnt, len, local_flags, depend_tx,
-                               _cb_fn, _cb_param);
-               }
+       if (chan) {
+               /* run the xor asynchronously */
+               pr_debug("%s (async): len: %zu\n", __func__, len);
 
-               /* the previous tx is hidden from the client,
-                * so ack it
-                */
-               if (i && depend_tx)
-                       async_tx_ack(depend_tx);
+               return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
+                                   flags, depend_tx, cb_fn, cb_param);
+       } else {
+               /* run the xor synchronously */
+               pr_debug("%s (sync): len: %zu\n", __func__, len);
 
-               depend_tx = tx;
+               /* in the sync case the dest is an implied source
+                * (assumes the dest is the first source)
+                */
+               if (flags & ASYNC_TX_XOR_DROP_DST) {
+                       src_cnt--;
+                       src_list++;
+               }
 
-               if (src_cnt > xor_src_cnt) {
-                       /* drop completed sources */
-                       src_cnt -= xor_src_cnt;
-                       src_off += xor_src_cnt;
+               /* wait for any prerequisite operations */
+               async_tx_quiesce(&depend_tx);
 
-                       /* unconditionally preserve the destination */
-                       flags &= ~ASYNC_TX_XOR_ZERO_DST;
+               do_sync_xor(dest, src_list, offset, src_cnt, len,
+                           flags, cb_fn, cb_param);
 
-                       /* use the intermediate result a source, but remember
-                        * it's dropped, because it's implied, in the sync case
-                        */
-                       src_list[--src_off] = dest;
-                       src_cnt++;
-                       flags |= ASYNC_TX_XOR_DROP_DST;
-               } else
-                       src_cnt = 0;
-               i++;
+               return NULL;
        }
-
-       return tx;
 }
 EXPORT_SYMBOL_GPL(async_xor);
 
@@ -285,11 +260,11 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
                tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
                                                      len, result,
                                                      dma_prep_flags);
-               if (!tx) {
-                       if (depend_tx)
-                               dma_wait_for_async_tx(depend_tx);
+               if (unlikely(!tx)) {
+                       async_tx_quiesce(&depend_tx);
 
                        while (!tx)
+                               dma_async_issue_pending(chan);
                                tx = device->device_prep_dma_zero_sum(chan,
                                        dma_src, src_cnt, len, result,
                                        dma_prep_flags);
@@ -307,18 +282,11 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
                tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
                        depend_tx, NULL, NULL);
 
-               if (tx) {
-                       if (dma_wait_for_async_tx(tx) == DMA_ERROR)
-                               panic("%s: DMA_ERROR waiting for tx\n",
-                                       __func__);
-                       async_tx_ack(tx);
-               }
+               async_tx_quiesce(&tx);
 
                *result = page_is_zero(dest, offset, len) ? 0 : 1;
 
-               tx = NULL;
-
-               async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
+               async_tx_sync_epilog(cb_fn, cb_param);
        }
 
        return tx;
index bf5b92f86df7e07ca3813a4e127f21c7b92f9bff..ec249d2db633edb68d57ad22f8f27d42e2c8e5f4 100644 (file)
 #include <linux/device.h>
 #include <linux/dca.h>
 
-MODULE_LICENSE("GPL");
+#define DCA_VERSION "1.4"
 
-/* For now we're assuming a single, global, DCA provider for the system. */
+MODULE_VERSION(DCA_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
 
 static DEFINE_SPINLOCK(dca_lock);
 
-static struct dca_provider *global_dca = NULL;
+static LIST_HEAD(dca_providers);
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+       struct dca_provider *dca, *ret = NULL;
+
+       list_for_each_entry(dca, &dca_providers, node) {
+               if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
+                       ret = dca;
+                       break;
+               }
+       }
+
+       return ret;
+}
 
 /**
  * dca_add_requester - add a dca client to the list
@@ -42,25 +58,39 @@ static struct dca_provider *global_dca = NULL;
  */
 int dca_add_requester(struct device *dev)
 {
-       int err, slot;
+       struct dca_provider *dca;
+       int err, slot = -ENODEV;
 
-       if (!global_dca)
-               return -ENODEV;
+       if (!dev)
+               return -EFAULT;
 
        spin_lock(&dca_lock);
-       slot = global_dca->ops->add_requester(global_dca, dev);
-       spin_unlock(&dca_lock);
-       if (slot < 0)
+
+       /* check if the requester has not been added already */
+       dca = dca_find_provider_by_dev(dev);
+       if (dca) {
+               spin_unlock(&dca_lock);
+               return -EEXIST;
+       }
+
+       list_for_each_entry(dca, &dca_providers, node) {
+               slot = dca->ops->add_requester(dca, dev);
+               if (slot >= 0)
+                       break;
+       }
+       if (slot < 0) {
+               spin_unlock(&dca_lock);
                return slot;
+       }
 
-       err = dca_sysfs_add_req(global_dca, dev, slot);
+       err = dca_sysfs_add_req(dca, dev, slot);
        if (err) {
-               spin_lock(&dca_lock);
-               global_dca->ops->remove_requester(global_dca, dev);
+               dca->ops->remove_requester(dca, dev);
                spin_unlock(&dca_lock);
                return err;
        }
 
+       spin_unlock(&dca_lock);
        return 0;
 }
 EXPORT_SYMBOL_GPL(dca_add_requester);
@@ -71,30 +101,78 @@ EXPORT_SYMBOL_GPL(dca_add_requester);
  */
 int dca_remove_requester(struct device *dev)
 {
+       struct dca_provider *dca;
        int slot;
-       if (!global_dca)
-               return -ENODEV;
+
+       if (!dev)
+               return -EFAULT;
 
        spin_lock(&dca_lock);
-       slot = global_dca->ops->remove_requester(global_dca, dev);
-       spin_unlock(&dca_lock);
-       if (slot < 0)
+       dca = dca_find_provider_by_dev(dev);
+       if (!dca) {
+               spin_unlock(&dca_lock);
+               return -ENODEV;
+       }
+       slot = dca->ops->remove_requester(dca, dev);
+       if (slot < 0) {
+               spin_unlock(&dca_lock);
                return slot;
+       }
 
-       dca_sysfs_remove_req(global_dca, slot);
+       dca_sysfs_remove_req(dca, slot);
+
+       spin_unlock(&dca_lock);
        return 0;
 }
 EXPORT_SYMBOL_GPL(dca_remove_requester);
 
 /**
- * dca_get_tag - return the dca tag for the given cpu
+ * dca_common_get_tag - return the dca tag (serves both new and old api)
+ * @dev - the device that wants dca service
  * @cpu - the cpuid as returned by get_cpu()
  */
-u8 dca_get_tag(int cpu)
+u8 dca_common_get_tag(struct device *dev, int cpu)
 {
-       if (!global_dca)
+       struct dca_provider *dca;
+       u8 tag;
+
+       spin_lock(&dca_lock);
+
+       dca = dca_find_provider_by_dev(dev);
+       if (!dca) {
+               spin_unlock(&dca_lock);
                return -ENODEV;
-       return global_dca->ops->get_tag(global_dca, cpu);
+       }
+       tag = dca->ops->get_tag(dca, dev, cpu);
+
+       spin_unlock(&dca_lock);
+       return tag;
+}
+
+/**
+ * dca3_get_tag - return the dca tag to the requester device
+ *                for the given cpu (new api)
+ * @dev - the device that wants dca service
+ * @cpu - the cpuid as returned by get_cpu()
+ */
+u8 dca3_get_tag(struct device *dev, int cpu)
+{
+       if (!dev)
+               return -EFAULT;
+
+       return dca_common_get_tag(dev, cpu);
+}
+EXPORT_SYMBOL_GPL(dca3_get_tag);
+
+/**
+ * dca_get_tag - return the dca tag for the given cpu (old api)
+ * @cpu - the cpuid as returned by get_cpu()
+ */
+u8 dca_get_tag(int cpu)
+{
+       struct device *dev = NULL;
+
+       return dca_common_get_tag(dev, cpu);
 }
 EXPORT_SYMBOL_GPL(dca_get_tag);
 
@@ -140,12 +218,10 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
 {
        int err;
 
-       if (global_dca)
-               return -EEXIST;
        err = dca_sysfs_add_provider(dca, dev);
        if (err)
                return err;
-       global_dca = dca;
+       list_add(&dca->node, &dca_providers);
        blocking_notifier_call_chain(&dca_provider_chain,
                                     DCA_PROVIDER_ADD, NULL);
        return 0;
@@ -158,11 +234,9 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
  */
 void unregister_dca_provider(struct dca_provider *dca)
 {
-       if (!global_dca)
-               return;
        blocking_notifier_call_chain(&dca_provider_chain,
                                     DCA_PROVIDER_REMOVE, NULL);
-       global_dca = NULL;
+       list_del(&dca->node);
        dca_sysfs_remove_provider(dca);
 }
 EXPORT_SYMBOL_GPL(unregister_dca_provider);
@@ -187,6 +261,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify);
 
 static int __init dca_init(void)
 {
+       printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
        return dca_sysfs_init();
 }
 
index 9a70377bfb34e808e954afb266eb5218f8bb7982..7af4b403bd2d12a6f3219f8f8d6615f5de69def1 100644 (file)
@@ -13,10 +13,11 @@ static spinlock_t dca_idr_lock;
 int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot)
 {
        struct device *cd;
+       static int req_count;
 
        cd = device_create_drvdata(dca_class, dca->cd,
                                   MKDEV(0, slot + 1), NULL,
-                                  "requester%d", slot);
+                                  "requester%d", req_count++);
        if (IS_ERR(cd))
                return PTR_ERR(cd);
        return 0;
index 6239c3df30ac53fd877e4779c721068568556e1d..cd303901eb5b20c13b1636ec32614d3ac8800a60 100644 (file)
@@ -4,13 +4,14 @@
 
 menuconfig DMADEVICES
        bool "DMA Engine support"
-       depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC
-       depends on !HIGHMEM64G
+       depends on !HIGHMEM64G && HAS_DMA
        help
          DMA engines can do asynchronous data transfers without
          involving the host CPU.  Currently, this framework can be
          used to offload memory copies in the network stack and
-         RAID operations in the MD driver.
+         RAID operations in the MD driver.  This menu only presents
+         DMA Device drivers supported by the configured arch, it may
+         be empty in some cases.
 
 if DMADEVICES
 
@@ -37,6 +38,15 @@ config INTEL_IOP_ADMA
        help
          Enable support for the Intel(R) IOP Series RAID engines.
 
+config DW_DMAC
+       tristate "Synopsys DesignWare AHB DMA support"
+       depends on AVR32
+       select DMA_ENGINE
+       default y if CPU_AT32AP7000
+       help
+         Support the Synopsys DesignWare AHB DMA controller.  This
+         can be integrated in chips such as the Atmel AT32ap7000.
+
 config FSL_DMA
        bool "Freescale MPC85xx/MPC83xx DMA support"
        depends on PPC
@@ -46,6 +56,14 @@ config FSL_DMA
          MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
          The MPC8349, MPC8360 is also supported.
 
+config MV_XOR
+       bool "Marvell XOR engine support"
+       depends on PLAT_ORION
+       select ASYNC_CORE
+       select DMA_ENGINE
+       ---help---
+         Enable support for the Marvell XOR engine.
+
 config DMA_ENGINE
        bool
 
@@ -55,10 +73,19 @@ comment "DMA Clients"
 config NET_DMA
        bool "Network: TCP receive copy offload"
        depends on DMA_ENGINE && NET
+       default (INTEL_IOATDMA || FSL_DMA)
        help
          This enables the use of DMA engines in the network stack to
          offload receive copy-to-user operations, freeing CPU cycles.
-         Since this is the main user of the DMA engine, it should be enabled;
-         say Y here.
+
+         Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
+         say N.
+
+config DMATEST
+       tristate "DMA Test client"
+       depends on DMA_ENGINE
+       help
+         Simple DMA test client. Say N unless you're debugging a
+         DMA Device driver.
 
 endif
index c8036d94590277d24f994a82c61fccbe51a89f07..14f59527d4f6bc5ab02e750e7580f209506cd7c1 100644 (file)
@@ -1,6 +1,9 @@
 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_NET_DMA) += iovlock.o
+obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
 ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
+obj-$(CONFIG_DW_DMAC) += dw_dmac.o
index 97b329e767983172a7069687d4d8fedb1e28cee7..dc003a3a787d545207fed8be6e81bb4aacd970c1 100644 (file)
@@ -169,12 +169,18 @@ static void dma_client_chan_alloc(struct dma_client *client)
        enum dma_state_client ack;
 
        /* Find a channel */
-       list_for_each_entry(device, &dma_device_list, global_node)
+       list_for_each_entry(device, &dma_device_list, global_node) {
+               /* Does the client require a specific DMA controller? */
+               if (client->slave && client->slave->dma_dev
+                               && client->slave->dma_dev != device->dev)
+                       continue;
+
                list_for_each_entry(chan, &device->channels, device_node) {
                        if (!dma_chan_satisfies_mask(chan, client->cap_mask))
                                continue;
 
-                       desc = chan->device->device_alloc_chan_resources(chan);
+                       desc = chan->device->device_alloc_chan_resources(
+                                       chan, client);
                        if (desc >= 0) {
                                ack = client->event_callback(client,
                                                chan,
@@ -183,12 +189,14 @@ static void dma_client_chan_alloc(struct dma_client *client)
                                /* we are done once this client rejects
                                 * an available resource
                                 */
-                               if (ack == DMA_ACK)
+                               if (ack == DMA_ACK) {
                                        dma_chan_get(chan);
-                               else if (ack == DMA_NAK)
+                                       chan->client_count++;
+                               } else if (ack == DMA_NAK)
                                        return;
                        }
                }
+       }
 }
 
 enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
@@ -272,8 +280,10 @@ static void dma_clients_notify_removed(struct dma_chan *chan)
                /* client was holding resources for this channel so
                 * free it
                 */
-               if (ack == DMA_ACK)
+               if (ack == DMA_ACK) {
                        dma_chan_put(chan);
+                       chan->client_count--;
+               }
        }
 
        mutex_unlock(&dma_list_mutex);
@@ -285,6 +295,10 @@ static void dma_clients_notify_removed(struct dma_chan *chan)
  */
 void dma_async_client_register(struct dma_client *client)
 {
+       /* validate client data */
+       BUG_ON(dma_has_cap(DMA_SLAVE, client->cap_mask) &&
+               !client->slave);
+
        mutex_lock(&dma_list_mutex);
        list_add_tail(&client->global_node, &dma_client_list);
        mutex_unlock(&dma_list_mutex);
@@ -313,8 +327,10 @@ void dma_async_client_unregister(struct dma_client *client)
                        ack = client->event_callback(client, chan,
                                DMA_RESOURCE_REMOVED);
 
-                       if (ack == DMA_ACK)
+                       if (ack == DMA_ACK) {
                                dma_chan_put(chan);
+                               chan->client_count--;
+                       }
                }
 
        list_del(&client->global_node);
@@ -359,6 +375,10 @@ int dma_async_device_register(struct dma_device *device)
                !device->device_prep_dma_memset);
        BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
                !device->device_prep_dma_interrupt);
+       BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+               !device->device_prep_slave_sg);
+       BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+               !device->device_terminate_all);
 
        BUG_ON(!device->device_alloc_chan_resources);
        BUG_ON(!device->device_free_chan_resources);
@@ -378,7 +398,7 @@ int dma_async_device_register(struct dma_device *device)
 
                chan->chan_id = chancnt++;
                chan->dev.class = &dma_devclass;
-               chan->dev.parent = NULL;
+               chan->dev.parent = device->dev;
                snprintf(chan->dev.bus_id, BUS_ID_SIZE, "dma%dchan%d",
                         device->dev_id, chan->chan_id);
 
@@ -394,6 +414,7 @@ int dma_async_device_register(struct dma_device *device)
                kref_get(&device->refcount);
                kref_get(&device->refcount);
                kref_init(&chan->refcount);
+               chan->client_count = 0;
                chan->slow_ref = 0;
                INIT_RCU_HEAD(&chan->rcu);
        }
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
new file mode 100644 (file)
index 0000000..a08d197
--- /dev/null
@@ -0,0 +1,444 @@
+/*
+ * DMA Engine test module
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/wait.h>
+
+static unsigned int test_buf_size = 16384;
+module_param(test_buf_size, uint, S_IRUGO);
+MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
+
+static char test_channel[BUS_ID_SIZE];
+module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO);
+MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
+
+static char test_device[BUS_ID_SIZE];
+module_param_string(device, test_device, sizeof(test_device), S_IRUGO);
+MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
+
+static unsigned int threads_per_chan = 1;
+module_param(threads_per_chan, uint, S_IRUGO);
+MODULE_PARM_DESC(threads_per_chan,
+               "Number of threads to start per channel (default: 1)");
+
+static unsigned int max_channels;
+module_param(max_channels, uint, S_IRUGO);
+MODULE_PARM_DESC(nr_channels,
+               "Maximum number of channels to use (default: all)");
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC            0x80
+#define PATTERN_DST            0x00
+#define PATTERN_COPY           0x40
+#define PATTERN_OVERWRITE      0x20
+#define PATTERN_COUNT_MASK     0x1f
+
+struct dmatest_thread {
+       struct list_head        node;
+       struct task_struct      *task;
+       struct dma_chan         *chan;
+       u8                      *srcbuf;
+       u8                      *dstbuf;
+};
+
+struct dmatest_chan {
+       struct list_head        node;
+       struct dma_chan         *chan;
+       struct list_head        threads;
+};
+
+/*
+ * These are protected by dma_list_mutex since they're only used by
+ * the DMA client event callback
+ */
+static LIST_HEAD(dmatest_channels);
+static unsigned int nr_channels;
+
+static bool dmatest_match_channel(struct dma_chan *chan)
+{
+       if (test_channel[0] == '\0')
+               return true;
+       return strcmp(chan->dev.bus_id, test_channel) == 0;
+}
+
+static bool dmatest_match_device(struct dma_device *device)
+{
+       if (test_device[0] == '\0')
+               return true;
+       return strcmp(device->dev->bus_id, test_device) == 0;
+}
+
+static unsigned long dmatest_random(void)
+{
+       unsigned long buf;
+
+       get_random_bytes(&buf, sizeof(buf));
+       return buf;
+}
+
+static void dmatest_init_srcbuf(u8 *buf, unsigned int start, unsigned int len)
+{
+       unsigned int i;
+
+       for (i = 0; i < start; i++)
+               buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+       for ( ; i < start + len; i++)
+               buf[i] = PATTERN_SRC | PATTERN_COPY
+                       | (~i & PATTERN_COUNT_MASK);;
+       for ( ; i < test_buf_size; i++)
+               buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+}
+
+static void dmatest_init_dstbuf(u8 *buf, unsigned int start, unsigned int len)
+{
+       unsigned int i;
+
+       for (i = 0; i < start; i++)
+               buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+       for ( ; i < start + len; i++)
+               buf[i] = PATTERN_DST | PATTERN_OVERWRITE
+                       | (~i & PATTERN_COUNT_MASK);
+       for ( ; i < test_buf_size; i++)
+               buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+}
+
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+               unsigned int counter, bool is_srcbuf)
+{
+       u8              diff = actual ^ pattern;
+       u8              expected = pattern | (~counter & PATTERN_COUNT_MASK);
+       const char      *thread_name = current->comm;
+
+       if (is_srcbuf)
+               pr_warning("%s: srcbuf[0x%x] overwritten!"
+                               " Expected %02x, got %02x\n",
+                               thread_name, index, expected, actual);
+       else if ((pattern & PATTERN_COPY)
+                       && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+               pr_warning("%s: dstbuf[0x%x] not copied!"
+                               " Expected %02x, got %02x\n",
+                               thread_name, index, expected, actual);
+       else if (diff & PATTERN_SRC)
+               pr_warning("%s: dstbuf[0x%x] was copied!"
+                               " Expected %02x, got %02x\n",
+                               thread_name, index, expected, actual);
+       else
+               pr_warning("%s: dstbuf[0x%x] mismatch!"
+                               " Expected %02x, got %02x\n",
+                               thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 *buf, unsigned int start,
+               unsigned int end, unsigned int counter, u8 pattern,
+               bool is_srcbuf)
+{
+       unsigned int i;
+       unsigned int error_count = 0;
+       u8 actual;
+
+       for (i = start; i < end; i++) {
+               actual = buf[i];
+               if (actual != (pattern | (~counter & PATTERN_COUNT_MASK))) {
+                       if (error_count < 32)
+                               dmatest_mismatch(actual, pattern, i, counter,
+                                               is_srcbuf);
+                       error_count++;
+               }
+               counter++;
+       }
+
+       if (error_count > 32)
+               pr_warning("%s: %u errors suppressed\n",
+                       current->comm, error_count - 32);
+
+       return error_count;
+}
+
+/*
+ * This function repeatedly tests DMA transfers of various lengths and
+ * offsets until it is told to exit by kthread_stop(). There may be
+ * multiple threads running this function in parallel for a single
+ * channel, and there may be multiple channels being tested in
+ * parallel.
+ *
+ * Before each test, the source and destination buffer is initialized
+ * with a known pattern. This pattern is different depending on
+ * whether it's in an area which is supposed to be copied or
+ * overwritten, and different in the source and destination buffers.
+ * So if the DMA engine doesn't copy exactly what we tell it to copy,
+ * we'll notice.
+ */
+static int dmatest_func(void *data)
+{
+       struct dmatest_thread   *thread = data;
+       struct dma_chan         *chan;
+       const char              *thread_name;
+       unsigned int            src_off, dst_off, len;
+       unsigned int            error_count;
+       unsigned int            failed_tests = 0;
+       unsigned int            total_tests = 0;
+       dma_cookie_t            cookie;
+       enum dma_status         status;
+       int                     ret;
+
+       thread_name = current->comm;
+
+       ret = -ENOMEM;
+       thread->srcbuf = kmalloc(test_buf_size, GFP_KERNEL);
+       if (!thread->srcbuf)
+               goto err_srcbuf;
+       thread->dstbuf = kmalloc(test_buf_size, GFP_KERNEL);
+       if (!thread->dstbuf)
+               goto err_dstbuf;
+
+       smp_rmb();
+       chan = thread->chan;
+       dma_chan_get(chan);
+
+       while (!kthread_should_stop()) {
+               total_tests++;
+
+               len = dmatest_random() % test_buf_size + 1;
+               src_off = dmatest_random() % (test_buf_size - len + 1);
+               dst_off = dmatest_random() % (test_buf_size - len + 1);
+
+               dmatest_init_srcbuf(thread->srcbuf, src_off, len);
+               dmatest_init_dstbuf(thread->dstbuf, dst_off, len);
+
+               cookie = dma_async_memcpy_buf_to_buf(chan,
+                               thread->dstbuf + dst_off,
+                               thread->srcbuf + src_off,
+                               len);
+               if (dma_submit_error(cookie)) {
+                       pr_warning("%s: #%u: submit error %d with src_off=0x%x "
+                                       "dst_off=0x%x len=0x%x\n",
+                                       thread_name, total_tests - 1, cookie,
+                                       src_off, dst_off, len);
+                       msleep(100);
+                       failed_tests++;
+                       continue;
+               }
+               dma_async_memcpy_issue_pending(chan);
+
+               do {
+                       msleep(1);
+                       status = dma_async_memcpy_complete(
+                                       chan, cookie, NULL, NULL);
+               } while (status == DMA_IN_PROGRESS);
+
+               if (status == DMA_ERROR) {
+                       pr_warning("%s: #%u: error during copy\n",
+                                       thread_name, total_tests - 1);
+                       failed_tests++;
+                       continue;
+               }
+
+               error_count = 0;
+
+               pr_debug("%s: verifying source buffer...\n", thread_name);
+               error_count += dmatest_verify(thread->srcbuf, 0, src_off,
+                               0, PATTERN_SRC, true);
+               error_count += dmatest_verify(thread->srcbuf, src_off,
+                               src_off + len, src_off,
+                               PATTERN_SRC | PATTERN_COPY, true);
+               error_count += dmatest_verify(thread->srcbuf, src_off + len,
+                               test_buf_size, src_off + len,
+                               PATTERN_SRC, true);
+
+               pr_debug("%s: verifying dest buffer...\n",
+                               thread->task->comm);
+               error_count += dmatest_verify(thread->dstbuf, 0, dst_off,
+                               0, PATTERN_DST, false);
+               error_count += dmatest_verify(thread->dstbuf, dst_off,
+                               dst_off + len, src_off,
+                               PATTERN_SRC | PATTERN_COPY, false);
+               error_count += dmatest_verify(thread->dstbuf, dst_off + len,
+                               test_buf_size, dst_off + len,
+                               PATTERN_DST, false);
+
+               if (error_count) {
+                       pr_warning("%s: #%u: %u errors with "
+                               "src_off=0x%x dst_off=0x%x len=0x%x\n",
+                               thread_name, total_tests - 1, error_count,
+                               src_off, dst_off, len);
+                       failed_tests++;
+               } else {
+                       pr_debug("%s: #%u: No errors with "
+                               "src_off=0x%x dst_off=0x%x len=0x%x\n",
+                               thread_name, total_tests - 1,
+                               src_off, dst_off, len);
+               }
+       }
+
+       ret = 0;
+       dma_chan_put(chan);
+       kfree(thread->dstbuf);
+err_dstbuf:
+       kfree(thread->srcbuf);
+err_srcbuf:
+       pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
+                       thread_name, total_tests, failed_tests, ret);
+       return ret;
+}
+
+static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
+{
+       struct dmatest_thread   *thread;
+       struct dmatest_thread   *_thread;
+       int                     ret;
+
+       list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
+               ret = kthread_stop(thread->task);
+               pr_debug("dmatest: thread %s exited with status %d\n",
+                               thread->task->comm, ret);
+               list_del(&thread->node);
+               kfree(thread);
+       }
+       kfree(dtc);
+}
+
+static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
+{
+       struct dmatest_chan     *dtc;
+       struct dmatest_thread   *thread;
+       unsigned int            i;
+
+       dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC);
+       if (!dtc) {
+               pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
+               return DMA_NAK;
+       }
+
+       dtc->chan = chan;
+       INIT_LIST_HEAD(&dtc->threads);
+
+       for (i = 0; i < threads_per_chan; i++) {
+               thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
+               if (!thread) {
+                       pr_warning("dmatest: No memory for %s-test%u\n",
+                                       chan->dev.bus_id, i);
+                       break;
+               }
+               thread->chan = dtc->chan;
+               smp_wmb();
+               thread->task = kthread_run(dmatest_func, thread, "%s-test%u",
+                               chan->dev.bus_id, i);
+               if (IS_ERR(thread->task)) {
+                       pr_warning("dmatest: Failed to run thread %s-test%u\n",
+                                       chan->dev.bus_id, i);
+                       kfree(thread);
+                       break;
+               }
+
+               /* srcbuf and dstbuf are allocated by the thread itself */
+
+               list_add_tail(&thread->node, &dtc->threads);
+       }
+
+       pr_info("dmatest: Started %u threads using %s\n", i, chan->dev.bus_id);
+
+       list_add_tail(&dtc->node, &dmatest_channels);
+       nr_channels++;
+
+       return DMA_ACK;
+}
+
+static enum dma_state_client dmatest_remove_channel(struct dma_chan *chan)
+{
+       struct dmatest_chan     *dtc, *_dtc;
+
+       list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
+               if (dtc->chan == chan) {
+                       list_del(&dtc->node);
+                       dmatest_cleanup_channel(dtc);
+                       pr_debug("dmatest: lost channel %s\n",
+                                       chan->dev.bus_id);
+                       return DMA_ACK;
+               }
+       }
+
+       return DMA_DUP;
+}
+
+/*
+ * Start testing threads as new channels are assigned to us, and kill
+ * them when the channels go away.
+ *
+ * When we unregister the client, all channels are removed so this
+ * will also take care of cleaning things up when the module is
+ * unloaded.
+ */
+static enum dma_state_client
+dmatest_event(struct dma_client *client, struct dma_chan *chan,
+               enum dma_state state)
+{
+       enum dma_state_client   ack = DMA_NAK;
+
+       switch (state) {
+       case DMA_RESOURCE_AVAILABLE:
+               if (!dmatest_match_channel(chan)
+                               || !dmatest_match_device(chan->device))
+                       ack = DMA_DUP;
+               else if (max_channels && nr_channels >= max_channels)
+                       ack = DMA_NAK;
+               else
+                       ack = dmatest_add_channel(chan);
+               break;
+
+       case DMA_RESOURCE_REMOVED:
+               ack = dmatest_remove_channel(chan);
+               break;
+
+       default:
+               pr_info("dmatest: Unhandled event %u (%s)\n",
+                               state, chan->dev.bus_id);
+               break;
+       }
+
+       return ack;
+}
+
+static struct dma_client dmatest_client = {
+       .event_callback = dmatest_event,
+};
+
+static int __init dmatest_init(void)
+{
+       dma_cap_set(DMA_MEMCPY, dmatest_client.cap_mask);
+       dma_async_client_register(&dmatest_client);
+       dma_async_client_chan_request(&dmatest_client);
+
+       return 0;
+}
+module_init(dmatest_init);
+
+static void __exit dmatest_exit(void)
+{
+       dma_async_client_unregister(&dmatest_client);
+}
+module_exit(dmatest_exit);
+
+MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
new file mode 100644 (file)
index 0000000..94df917
--- /dev/null
@@ -0,0 +1,1122 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on
+ * AVR32 systems.)
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dw_dmac_regs.h"
+
+/*
+ * This supports the Synopsys "DesignWare AHB Central DMA Controller",
+ * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all
+ * of which use ARM any more).  See the "Databook" from Synopsys for
+ * information beyond what licensees probably provide.
+ *
+ * The driver has currently been tested only with the Atmel AT32AP7000,
+ * which does not support descriptor writeback.
+ */
+
+/* NOTE:  DMS+SMS is system-specific. We should get this information
+ * from the platform code somehow.
+ */
+#define DWC_DEFAULT_CTLLO      (DWC_CTLL_DST_MSIZE(0)          \
+                               | DWC_CTLL_SRC_MSIZE(0)         \
+                               | DWC_CTLL_DMS(0)               \
+                               | DWC_CTLL_SMS(1)               \
+                               | DWC_CTLL_LLP_D_EN             \
+                               | DWC_CTLL_LLP_S_EN)
+
+/*
+ * This is configuration-dependent and usually a funny size like 4095.
+ * Let's round it down to the nearest power of two.
+ *
+ * Note that this is a transfer count, i.e. if we transfer 32-bit
+ * words, we can do 8192 bytes per descriptor.
+ *
+ * This parameter is also system-specific.
+ */
+#define DWC_MAX_COUNT  2048U
+
+/*
+ * Number of descriptors to allocate for each channel. This should be
+ * made configurable somehow; preferably, the clients (at least the
+ * ones using slave transfers) should be able to give us a hint.
+ */
+#define NR_DESCS_PER_CHANNEL   64
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Because we're not relying on writeback from the controller (it may not
+ * even be configured into the core!) we don't need to use dma_pool.  These
+ * descriptors -- and associated data -- are cacheable.  We do need to make
+ * sure their dcache entries are written back before handing them off to
+ * the controller, though.
+ */
+
+static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
+{
+       return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc)
+{
+       return list_entry(dwc->queue.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+{
+       struct dw_desc *desc, *_desc;
+       struct dw_desc *ret = NULL;
+       unsigned int i = 0;
+
+       spin_lock_bh(&dwc->lock);
+       list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+               if (async_tx_test_ack(&desc->txd)) {
+                       list_del(&desc->desc_node);
+                       ret = desc;
+                       break;
+               }
+               dev_dbg(&dwc->chan.dev, "desc %p not ACKed\n", desc);
+               i++;
+       }
+       spin_unlock_bh(&dwc->lock);
+
+       dev_vdbg(&dwc->chan.dev, "scanned %u descriptors on freelist\n", i);
+
+       return ret;
+}
+
+static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+       struct dw_desc  *child;
+
+       list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               dma_sync_single_for_cpu(dwc->chan.dev.parent,
+                               child->txd.phys, sizeof(child->lli),
+                               DMA_TO_DEVICE);
+       dma_sync_single_for_cpu(dwc->chan.dev.parent,
+                       desc->txd.phys, sizeof(desc->lli),
+                       DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+       if (desc) {
+               struct dw_desc *child;
+
+               dwc_sync_desc_for_cpu(dwc, desc);
+
+               spin_lock_bh(&dwc->lock);
+               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+                       dev_vdbg(&dwc->chan.dev,
+                                       "moving child desc %p to freelist\n",
+                                       child);
+               list_splice_init(&desc->txd.tx_list, &dwc->free_list);
+               dev_vdbg(&dwc->chan.dev, "moving desc %p to freelist\n", desc);
+               list_add(&desc->desc_node, &dwc->free_list);
+               spin_unlock_bh(&dwc->lock);
+       }
+}
+
+/* Called with dwc->lock held and bh disabled */
+static dma_cookie_t
+dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+       dma_cookie_t cookie = dwc->chan.cookie;
+
+       if (++cookie < 0)
+               cookie = 1;
+
+       dwc->chan.cookie = cookie;
+       desc->txd.cookie = cookie;
+
+       return cookie;
+}
+
+/*----------------------------------------------------------------------*/
+
+/* Called with dwc->lock held and bh disabled */
+static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+{
+       struct dw_dma   *dw = to_dw_dma(dwc->chan.device);
+
+       /* ASSERT:  channel is idle */
+       if (dma_readl(dw, CH_EN) & dwc->mask) {
+               dev_err(&dwc->chan.dev,
+                       "BUG: Attempted to start non-idle channel\n");
+               dev_err(&dwc->chan.dev,
+                       "  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+                       channel_readl(dwc, SAR),
+                       channel_readl(dwc, DAR),
+                       channel_readl(dwc, LLP),
+                       channel_readl(dwc, CTL_HI),
+                       channel_readl(dwc, CTL_LO));
+
+               /* The tasklet will hopefully advance the queue... */
+               return;
+       }
+
+       channel_writel(dwc, LLP, first->txd.phys);
+       channel_writel(dwc, CTL_LO,
+                       DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+       channel_writel(dwc, CTL_HI, 0);
+       channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+       dma_async_tx_callback           callback;
+       void                            *param;
+       struct dma_async_tx_descriptor  *txd = &desc->txd;
+
+       dev_vdbg(&dwc->chan.dev, "descriptor %u complete\n", txd->cookie);
+
+       dwc->completed = txd->cookie;
+       callback = txd->callback;
+       param = txd->callback_param;
+
+       dwc_sync_desc_for_cpu(dwc, desc);
+       list_splice_init(&txd->tx_list, &dwc->free_list);
+       list_move(&desc->desc_node, &dwc->free_list);
+
+       /*
+        * We use dma_unmap_page() regardless of how the buffers were
+        * mapped before they were submitted...
+        */
+       if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
+               dma_unmap_page(dwc->chan.dev.parent, desc->lli.dar, desc->len,
+                               DMA_FROM_DEVICE);
+       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
+               dma_unmap_page(dwc->chan.dev.parent, desc->lli.sar, desc->len,
+                               DMA_TO_DEVICE);
+
+       /*
+        * The API requires that no submissions are done from a
+        * callback, so we don't need to drop the lock here
+        */
+       if (callback)
+               callback(param);
+}
+
+static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+       struct dw_desc *desc, *_desc;
+       LIST_HEAD(list);
+
+       if (dma_readl(dw, CH_EN) & dwc->mask) {
+               dev_err(&dwc->chan.dev,
+                       "BUG: XFER bit set, but channel not idle!\n");
+
+               /* Try to continue after resetting the channel... */
+               channel_clear_bit(dw, CH_EN, dwc->mask);
+               while (dma_readl(dw, CH_EN) & dwc->mask)
+                       cpu_relax();
+       }
+
+       /*
+        * Submit queued descriptors ASAP, i.e. before we go through
+        * the completed ones.
+        */
+       if (!list_empty(&dwc->queue))
+               dwc_dostart(dwc, dwc_first_queued(dwc));
+       list_splice_init(&dwc->active_list, &list);
+       list_splice_init(&dwc->queue, &dwc->active_list);
+
+       list_for_each_entry_safe(desc, _desc, &list, desc_node)
+               dwc_descriptor_complete(dwc, desc);
+}
+
+static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+       dma_addr_t llp;
+       struct dw_desc *desc, *_desc;
+       struct dw_desc *child;
+       u32 status_xfer;
+
+       /*
+        * Clear block interrupt flag before scanning so that we don't
+        * miss any, and read LLP before RAW_XFER to ensure it is
+        * valid if we decide to scan the list.
+        */
+       dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+       llp = channel_readl(dwc, LLP);
+       status_xfer = dma_readl(dw, RAW.XFER);
+
+       if (status_xfer & dwc->mask) {
+               /* Everything we've submitted is done */
+               dma_writel(dw, CLEAR.XFER, dwc->mask);
+               dwc_complete_all(dw, dwc);
+               return;
+       }
+
+       dev_vdbg(&dwc->chan.dev, "scan_descriptors: llp=0x%x\n", llp);
+
+       list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+               if (desc->lli.llp == llp)
+                       /* This one is currently in progress */
+                       return;
+
+               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+                       if (child->lli.llp == llp)
+                               /* Currently in progress */
+                               return;
+
+               /*
+                * No descriptors so far seem to be in progress, i.e.
+                * this one must be done.
+                */
+               dwc_descriptor_complete(dwc, desc);
+       }
+
+       dev_err(&dwc->chan.dev,
+               "BUG: All descriptors done, but channel not idle!\n");
+
+       /* Try to continue after resetting the channel... */
+       channel_clear_bit(dw, CH_EN, dwc->mask);
+       while (dma_readl(dw, CH_EN) & dwc->mask)
+               cpu_relax();
+
+       if (!list_empty(&dwc->queue)) {
+               dwc_dostart(dwc, dwc_first_queued(dwc));
+               list_splice_init(&dwc->queue, &dwc->active_list);
+       }
+}
+
+static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
+{
+       dev_printk(KERN_CRIT, &dwc->chan.dev,
+                       "  desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+                       lli->sar, lli->dar, lli->llp,
+                       lli->ctlhi, lli->ctllo);
+}
+
+static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+       struct dw_desc *bad_desc;
+       struct dw_desc *child;
+
+       dwc_scan_descriptors(dw, dwc);
+
+       /*
+        * The descriptor currently at the head of the active list is
+        * borked. Since we don't have any way to report errors, we'll
+        * just have to scream loudly and try to carry on.
+        */
+       bad_desc = dwc_first_active(dwc);
+       list_del_init(&bad_desc->desc_node);
+       list_splice_init(&dwc->queue, dwc->active_list.prev);
+
+       /* Clear the error flag and try to restart the controller */
+       dma_writel(dw, CLEAR.ERROR, dwc->mask);
+       if (!list_empty(&dwc->active_list))
+               dwc_dostart(dwc, dwc_first_active(dwc));
+
+       /*
+        * KERN_CRITICAL may seem harsh, but since this only happens
+        * when someone submits a bad physical address in a
+        * descriptor, we should consider ourselves lucky that the
+        * controller flagged an error instead of scribbling over
+        * random memory locations.
+        */
+       dev_printk(KERN_CRIT, &dwc->chan.dev,
+                       "Bad descriptor submitted for DMA!\n");
+       dev_printk(KERN_CRIT, &dwc->chan.dev,
+                       "  cookie: %d\n", bad_desc->txd.cookie);
+       dwc_dump_lli(dwc, &bad_desc->lli);
+       list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+               dwc_dump_lli(dwc, &child->lli);
+
+       /* Pretend the descriptor completed successfully */
+       dwc_descriptor_complete(dwc, bad_desc);
+}
+
+static void dw_dma_tasklet(unsigned long data)
+{
+       struct dw_dma *dw = (struct dw_dma *)data;
+       struct dw_dma_chan *dwc;
+       u32 status_block;
+       u32 status_xfer;
+       u32 status_err;
+       int i;
+
+       status_block = dma_readl(dw, RAW.BLOCK);
+       status_xfer = dma_readl(dw, RAW.BLOCK);
+       status_err = dma_readl(dw, RAW.ERROR);
+
+       dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",
+                       status_block, status_err);
+
+       for (i = 0; i < dw->dma.chancnt; i++) {
+               dwc = &dw->chan[i];
+               spin_lock(&dwc->lock);
+               if (status_err & (1 << i))
+                       dwc_handle_error(dw, dwc);
+               else if ((status_block | status_xfer) & (1 << i))
+                       dwc_scan_descriptors(dw, dwc);
+               spin_unlock(&dwc->lock);
+       }
+
+       /*
+        * Re-enable interrupts. Block Complete interrupts are only
+        * enabled if the INT_EN bit in the descriptor is set. This
+        * will trigger a scan before the whole list is done.
+        */
+       channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
+       channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+       channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
+}
+
+static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+{
+       struct dw_dma *dw = dev_id;
+       u32 status;
+
+       dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n",
+                       dma_readl(dw, STATUS_INT));
+
+       /*
+        * Just disable the interrupts. We'll turn them back on in the
+        * softirq handler.
+        */
+       channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+       status = dma_readl(dw, STATUS_INT);
+       if (status) {
+               dev_err(dw->dma.dev,
+                       "BUG: Unexpected interrupts pending: 0x%x\n",
+                       status);
+
+               /* Try to recover */
+               channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
+               channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
+               channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
+               channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
+               channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
+       }
+
+       tasklet_schedule(&dw->tasklet);
+
+       return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct dw_desc          *desc = txd_to_dw_desc(tx);
+       struct dw_dma_chan      *dwc = to_dw_dma_chan(tx->chan);
+       dma_cookie_t            cookie;
+
+       spin_lock_bh(&dwc->lock);
+       cookie = dwc_assign_cookie(dwc, desc);
+
+       /*
+        * REVISIT: We should attempt to chain as many descriptors as
+        * possible, perhaps even appending to those already submitted
+        * for DMA. But this is hard to do in a race-free manner.
+        */
+       if (list_empty(&dwc->active_list)) {
+               dev_vdbg(&tx->chan->dev, "tx_submit: started %u\n",
+                               desc->txd.cookie);
+               dwc_dostart(dwc, desc);
+               list_add_tail(&desc->desc_node, &dwc->active_list);
+       } else {
+               dev_vdbg(&tx->chan->dev, "tx_submit: queued %u\n",
+                               desc->txd.cookie);
+
+               list_add_tail(&desc->desc_node, &dwc->queue);
+       }
+
+       spin_unlock_bh(&dwc->lock);
+
+       return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+               size_t len, unsigned long flags)
+{
+       struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
+       struct dw_desc          *desc;
+       struct dw_desc          *first;
+       struct dw_desc          *prev;
+       size_t                  xfer_count;
+       size_t                  offset;
+       unsigned int            src_width;
+       unsigned int            dst_width;
+       u32                     ctllo;
+
+       dev_vdbg(&chan->dev, "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n",
+                       dest, src, len, flags);
+
+       if (unlikely(!len)) {
+               dev_dbg(&chan->dev, "prep_dma_memcpy: length is zero!\n");
+               return NULL;
+       }
+
+       /*
+        * We can be a lot more clever here, but this should take care
+        * of the most common optimization.
+        */
+       if (!((src | dest  | len) & 3))
+               src_width = dst_width = 2;
+       else if (!((src | dest | len) & 1))
+               src_width = dst_width = 1;
+       else
+               src_width = dst_width = 0;
+
+       ctllo = DWC_DEFAULT_CTLLO
+                       | DWC_CTLL_DST_WIDTH(dst_width)
+                       | DWC_CTLL_SRC_WIDTH(src_width)
+                       | DWC_CTLL_DST_INC
+                       | DWC_CTLL_SRC_INC
+                       | DWC_CTLL_FC_M2M;
+       prev = first = NULL;
+
+       for (offset = 0; offset < len; offset += xfer_count << src_width) {
+               xfer_count = min_t(size_t, (len - offset) >> src_width,
+                               DWC_MAX_COUNT);
+
+               desc = dwc_desc_get(dwc);
+               if (!desc)
+                       goto err_desc_get;
+
+               desc->lli.sar = src + offset;
+               desc->lli.dar = dest + offset;
+               desc->lli.ctllo = ctllo;
+               desc->lli.ctlhi = xfer_count;
+
+               if (!first) {
+                       first = desc;
+               } else {
+                       prev->lli.llp = desc->txd.phys;
+                       dma_sync_single_for_device(chan->dev.parent,
+                                       prev->txd.phys, sizeof(prev->lli),
+                                       DMA_TO_DEVICE);
+                       list_add_tail(&desc->desc_node,
+                                       &first->txd.tx_list);
+               }
+               prev = desc;
+       }
+
+
+       if (flags & DMA_PREP_INTERRUPT)
+               /* Trigger interrupt after last block */
+               prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+       prev->lli.llp = 0;
+       dma_sync_single_for_device(chan->dev.parent,
+                       prev->txd.phys, sizeof(prev->lli),
+                       DMA_TO_DEVICE);
+
+       first->txd.flags = flags;
+       first->len = len;
+
+       return &first->txd;
+
+err_desc_get:
+       dwc_desc_put(dwc, first);
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+               unsigned int sg_len, enum dma_data_direction direction,
+               unsigned long flags)
+{
+       struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
+       struct dw_dma_slave     *dws = dwc->dws;
+       struct dw_desc          *prev;
+       struct dw_desc          *first;
+       u32                     ctllo;
+       dma_addr_t              reg;
+       unsigned int            reg_width;
+       unsigned int            mem_width;
+       unsigned int            i;
+       struct scatterlist      *sg;
+       size_t                  total_len = 0;
+
+       dev_vdbg(&chan->dev, "prep_dma_slave\n");
+
+       if (unlikely(!dws || !sg_len))
+               return NULL;
+
+       reg_width = dws->slave.reg_width;
+       prev = first = NULL;
+
+       sg_len = dma_map_sg(chan->dev.parent, sgl, sg_len, direction);
+
+       switch (direction) {
+       case DMA_TO_DEVICE:
+               ctllo = (DWC_DEFAULT_CTLLO
+                               | DWC_CTLL_DST_WIDTH(reg_width)
+                               | DWC_CTLL_DST_FIX
+                               | DWC_CTLL_SRC_INC
+                               | DWC_CTLL_FC_M2P);
+               reg = dws->slave.tx_reg;
+               for_each_sg(sgl, sg, sg_len, i) {
+                       struct dw_desc  *desc;
+                       u32             len;
+                       u32             mem;
+
+                       desc = dwc_desc_get(dwc);
+                       if (!desc) {
+                               dev_err(&chan->dev,
+                                       "not enough descriptors available\n");
+                               goto err_desc_get;
+                       }
+
+                       mem = sg_phys(sg);
+                       len = sg_dma_len(sg);
+                       mem_width = 2;
+                       if (unlikely(mem & 3 || len & 3))
+                               mem_width = 0;
+
+                       desc->lli.sar = mem;
+                       desc->lli.dar = reg;
+                       desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
+                       desc->lli.ctlhi = len >> mem_width;
+
+                       if (!first) {
+                               first = desc;
+                       } else {
+                               prev->lli.llp = desc->txd.phys;
+                               dma_sync_single_for_device(chan->dev.parent,
+                                               prev->txd.phys,
+                                               sizeof(prev->lli),
+                                               DMA_TO_DEVICE);
+                               list_add_tail(&desc->desc_node,
+                                               &first->txd.tx_list);
+                       }
+                       prev = desc;
+                       total_len += len;
+               }
+               break;
+       case DMA_FROM_DEVICE:
+               ctllo = (DWC_DEFAULT_CTLLO
+                               | DWC_CTLL_SRC_WIDTH(reg_width)
+                               | DWC_CTLL_DST_INC
+                               | DWC_CTLL_SRC_FIX
+                               | DWC_CTLL_FC_P2M);
+
+               reg = dws->slave.rx_reg;
+               for_each_sg(sgl, sg, sg_len, i) {
+                       struct dw_desc  *desc;
+                       u32             len;
+                       u32             mem;
+
+                       desc = dwc_desc_get(dwc);
+                       if (!desc) {
+                               dev_err(&chan->dev,
+                                       "not enough descriptors available\n");
+                               goto err_desc_get;
+                       }
+
+                       mem = sg_phys(sg);
+                       len = sg_dma_len(sg);
+                       mem_width = 2;
+                       if (unlikely(mem & 3 || len & 3))
+                               mem_width = 0;
+
+                       desc->lli.sar = reg;
+                       desc->lli.dar = mem;
+                       desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
+                       desc->lli.ctlhi = len >> reg_width;
+
+                       if (!first) {
+                               first = desc;
+                       } else {
+                               prev->lli.llp = desc->txd.phys;
+                               dma_sync_single_for_device(chan->dev.parent,
+                                               prev->txd.phys,
+                                               sizeof(prev->lli),
+                                               DMA_TO_DEVICE);
+                               list_add_tail(&desc->desc_node,
+                                               &first->txd.tx_list);
+                       }
+                       prev = desc;
+                       total_len += len;
+               }
+               break;
+       default:
+               return NULL;
+       }
+
+       if (flags & DMA_PREP_INTERRUPT)
+               /* Trigger interrupt after last block */
+               prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+       prev->lli.llp = 0;
+       dma_sync_single_for_device(chan->dev.parent,
+                       prev->txd.phys, sizeof(prev->lli),
+                       DMA_TO_DEVICE);
+
+       first->len = total_len;
+
+       return &first->txd;
+
+err_desc_get:
+       dwc_desc_put(dwc, first);
+       return NULL;
+}
+
+static void dwc_terminate_all(struct dma_chan *chan)
+{
+       struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
+       struct dw_dma           *dw = to_dw_dma(chan->device);
+       struct dw_desc          *desc, *_desc;
+       LIST_HEAD(list);
+
+       /*
+        * This is only called when something went wrong elsewhere, so
+        * we don't really care about the data. Just disable the
+        * channel. We still have to poll the channel enable bit due
+        * to AHB/HSB limitations.
+        */
+       spin_lock_bh(&dwc->lock);
+
+       channel_clear_bit(dw, CH_EN, dwc->mask);
+
+       while (dma_readl(dw, CH_EN) & dwc->mask)
+               cpu_relax();
+
+       /* active_list entries will end up before queued entries */
+       list_splice_init(&dwc->queue, &list);
+       list_splice_init(&dwc->active_list, &list);
+
+       spin_unlock_bh(&dwc->lock);
+
+       /* Flush all pending and queued descriptors */
+       list_for_each_entry_safe(desc, _desc, &list, desc_node)
+               dwc_descriptor_complete(dwc, desc);
+}
+
+static enum dma_status
+dwc_is_tx_complete(struct dma_chan *chan,
+               dma_cookie_t cookie,
+               dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
+       dma_cookie_t            last_used;
+       dma_cookie_t            last_complete;
+       int                     ret;
+
+       last_complete = dwc->completed;
+       last_used = chan->cookie;
+
+       ret = dma_async_is_complete(cookie, last_complete, last_used);
+       if (ret != DMA_SUCCESS) {
+               dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+               last_complete = dwc->completed;
+               last_used = chan->cookie;
+
+               ret = dma_async_is_complete(cookie, last_complete, last_used);
+       }
+
+       if (done)
+               *done = last_complete;
+       if (used)
+               *used = last_used;
+
+       return ret;
+}
+
+static void dwc_issue_pending(struct dma_chan *chan)
+{
+       struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
+
+       spin_lock_bh(&dwc->lock);
+       if (!list_empty(&dwc->queue))
+               dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+       spin_unlock_bh(&dwc->lock);
+}
+
+static int dwc_alloc_chan_resources(struct dma_chan *chan,
+               struct dma_client *client)
+{
+       struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
+       struct dw_dma           *dw = to_dw_dma(chan->device);
+       struct dw_desc          *desc;
+       struct dma_slave        *slave;
+       struct dw_dma_slave     *dws;
+       int                     i;
+       u32                     cfghi;
+       u32                     cfglo;
+
+       dev_vdbg(&chan->dev, "alloc_chan_resources\n");
+
+       /* Channels doing slave DMA can only handle one client. */
+       if (dwc->dws || client->slave) {
+               if (chan->client_count)
+                       return -EBUSY;
+       }
+
+       /* ASSERT:  channel is idle */
+       if (dma_readl(dw, CH_EN) & dwc->mask) {
+               dev_dbg(&chan->dev, "DMA channel not idle?\n");
+               return -EIO;
+       }
+
+       dwc->completed = chan->cookie = 1;
+
+       cfghi = DWC_CFGH_FIFO_MODE;
+       cfglo = 0;
+
+       slave = client->slave;
+       if (slave) {
+               /*
+                * We need controller-specific data to set up slave
+                * transfers.
+                */
+               BUG_ON(!slave->dma_dev || slave->dma_dev != dw->dma.dev);
+
+               dws = container_of(slave, struct dw_dma_slave, slave);
+
+               dwc->dws = dws;
+               cfghi = dws->cfg_hi;
+               cfglo = dws->cfg_lo;
+       } else {
+               dwc->dws = NULL;
+       }
+
+       channel_writel(dwc, CFG_LO, cfglo);
+       channel_writel(dwc, CFG_HI, cfghi);
+
+       /*
+        * NOTE: some controllers may have additional features that we
+        * need to initialize here, like "scatter-gather" (which
+        * doesn't mean what you think it means), and status writeback.
+        */
+
+       spin_lock_bh(&dwc->lock);
+       i = dwc->descs_allocated;
+       while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
+               spin_unlock_bh(&dwc->lock);
+
+               desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL);
+               if (!desc) {
+                       dev_info(&chan->dev,
+                               "only allocated %d descriptors\n", i);
+                       spin_lock_bh(&dwc->lock);
+                       break;
+               }
+
+               dma_async_tx_descriptor_init(&desc->txd, chan);
+               desc->txd.tx_submit = dwc_tx_submit;
+               desc->txd.flags = DMA_CTRL_ACK;
+               INIT_LIST_HEAD(&desc->txd.tx_list);
+               desc->txd.phys = dma_map_single(chan->dev.parent, &desc->lli,
+                               sizeof(desc->lli), DMA_TO_DEVICE);
+               dwc_desc_put(dwc, desc);
+
+               spin_lock_bh(&dwc->lock);
+               i = ++dwc->descs_allocated;
+       }
+
+       /* Enable interrupts */
+       channel_set_bit(dw, MASK.XFER, dwc->mask);
+       channel_set_bit(dw, MASK.BLOCK, dwc->mask);
+       channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+       spin_unlock_bh(&dwc->lock);
+
+       dev_dbg(&chan->dev,
+               "alloc_chan_resources allocated %d descriptors\n", i);
+
+       return i;
+}
+
+static void dwc_free_chan_resources(struct dma_chan *chan)
+{
+       struct dw_dma_chan      *dwc = to_dw_dma_chan(chan);
+       struct dw_dma           *dw = to_dw_dma(chan->device);
+       struct dw_desc          *desc, *_desc;
+       LIST_HEAD(list);
+
+       dev_dbg(&chan->dev, "free_chan_resources (descs allocated=%u)\n",
+                       dwc->descs_allocated);
+
+       /* ASSERT:  channel is idle */
+       BUG_ON(!list_empty(&dwc->active_list));
+       BUG_ON(!list_empty(&dwc->queue));
+       BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+       spin_lock_bh(&dwc->lock);
+       list_splice_init(&dwc->free_list, &list);
+       dwc->descs_allocated = 0;
+       dwc->dws = NULL;
+
+       /* Disable interrupts */
+       channel_clear_bit(dw, MASK.XFER, dwc->mask);
+       channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
+       channel_clear_bit(dw, MASK.ERROR, dwc->mask);
+
+       spin_unlock_bh(&dwc->lock);
+
+       list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+               dev_vdbg(&chan->dev, "  freeing descriptor %p\n", desc);
+               dma_unmap_single(chan->dev.parent, desc->txd.phys,
+                               sizeof(desc->lli), DMA_TO_DEVICE);
+               kfree(desc);
+       }
+
+       dev_vdbg(&chan->dev, "free_chan_resources done\n");
+}
+
+/*----------------------------------------------------------------------*/
+
+static void dw_dma_off(struct dw_dma *dw)
+{
+       dma_writel(dw, CFG, 0);
+
+       channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+       while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
+               cpu_relax();
+}
+
+static int __init dw_probe(struct platform_device *pdev)
+{
+       struct dw_dma_platform_data *pdata;
+       struct resource         *io;
+       struct dw_dma           *dw;
+       size_t                  size;
+       int                     irq;
+       int                     err;
+       int                     i;
+
+       pdata = pdev->dev.platform_data;
+       if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
+               return -EINVAL;
+
+       io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!io)
+               return -EINVAL;
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0)
+               return irq;
+
+       size = sizeof(struct dw_dma);
+       size += pdata->nr_channels * sizeof(struct dw_dma_chan);
+       dw = kzalloc(size, GFP_KERNEL);
+       if (!dw)
+               return -ENOMEM;
+
+       if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) {
+               err = -EBUSY;
+               goto err_kfree;
+       }
+
+       memset(dw, 0, sizeof *dw);
+
+       dw->regs = ioremap(io->start, DW_REGLEN);
+       if (!dw->regs) {
+               err = -ENOMEM;
+               goto err_release_r;
+       }
+
+       dw->clk = clk_get(&pdev->dev, "hclk");
+       if (IS_ERR(dw->clk)) {
+               err = PTR_ERR(dw->clk);
+               goto err_clk;
+       }
+       clk_enable(dw->clk);
+
+       /* force dma off, just in case */
+       dw_dma_off(dw);
+
+       err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
+       if (err)
+               goto err_irq;
+
+       platform_set_drvdata(pdev, dw);
+
+       tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
+
+       dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+       INIT_LIST_HEAD(&dw->dma.channels);
+       for (i = 0; i < pdata->nr_channels; i++, dw->dma.chancnt++) {
+               struct dw_dma_chan      *dwc = &dw->chan[i];
+
+               dwc->chan.device = &dw->dma;
+               dwc->chan.cookie = dwc->completed = 1;
+               dwc->chan.chan_id = i;
+               list_add_tail(&dwc->chan.device_node, &dw->dma.channels);
+
+               dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
+               spin_lock_init(&dwc->lock);
+               dwc->mask = 1 << i;
+
+               INIT_LIST_HEAD(&dwc->active_list);
+               INIT_LIST_HEAD(&dwc->queue);
+               INIT_LIST_HEAD(&dwc->free_list);
+
+               channel_clear_bit(dw, CH_EN, dwc->mask);
+       }
+
+       /* Clear/disable all interrupts on all channels. */
+       dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
+       dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
+       dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
+       dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
+       dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
+
+       channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+       channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+       dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+       dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+       dw->dma.dev = &pdev->dev;
+       dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
+       dw->dma.device_free_chan_resources = dwc_free_chan_resources;
+
+       dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
+
+       dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
+       dw->dma.device_terminate_all = dwc_terminate_all;
+
+       dw->dma.device_is_tx_complete = dwc_is_tx_complete;
+       dw->dma.device_issue_pending = dwc_issue_pending;
+
+       dma_writel(dw, CFG, DW_CFG_DMA_EN);
+
+       printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n",
+                       pdev->dev.bus_id, dw->dma.chancnt);
+
+       dma_async_device_register(&dw->dma);
+
+       return 0;
+
+err_irq:
+       clk_disable(dw->clk);
+       clk_put(dw->clk);
+err_clk:
+       iounmap(dw->regs);
+       dw->regs = NULL;
+err_release_r:
+       release_resource(io);
+err_kfree:
+       kfree(dw);
+       return err;
+}
+
+static int __exit dw_remove(struct platform_device *pdev)
+{
+       struct dw_dma           *dw = platform_get_drvdata(pdev);
+       struct dw_dma_chan      *dwc, *_dwc;
+       struct resource         *io;
+
+       dw_dma_off(dw);
+       dma_async_device_unregister(&dw->dma);
+
+       free_irq(platform_get_irq(pdev, 0), dw);
+       tasklet_kill(&dw->tasklet);
+
+       list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
+                       chan.device_node) {
+               list_del(&dwc->chan.device_node);
+               channel_clear_bit(dw, CH_EN, dwc->mask);
+       }
+
+       clk_disable(dw->clk);
+       clk_put(dw->clk);
+
+       iounmap(dw->regs);
+       dw->regs = NULL;
+
+       io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       release_mem_region(io->start, DW_REGLEN);
+
+       kfree(dw);
+
+       return 0;
+}
+
+static void dw_shutdown(struct platform_device *pdev)
+{
+       struct dw_dma   *dw = platform_get_drvdata(pdev);
+
+       dw_dma_off(platform_get_drvdata(pdev));
+       clk_disable(dw->clk);
+}
+
+static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg)
+{
+       struct dw_dma   *dw = platform_get_drvdata(pdev);
+
+       dw_dma_off(platform_get_drvdata(pdev));
+       clk_disable(dw->clk);
+       return 0;
+}
+
+static int dw_resume_early(struct platform_device *pdev)
+{
+       struct dw_dma   *dw = platform_get_drvdata(pdev);
+
+       clk_enable(dw->clk);
+       dma_writel(dw, CFG, DW_CFG_DMA_EN);
+       return 0;
+
+}
+
+static struct platform_driver dw_driver = {
+       .remove         = __exit_p(dw_remove),
+       .shutdown       = dw_shutdown,
+       .suspend_late   = dw_suspend_late,
+       .resume_early   = dw_resume_early,
+       .driver = {
+               .name   = "dw_dmac",
+       },
+};
+
+static int __init dw_init(void)
+{
+       return platform_driver_probe(&dw_driver, dw_probe);
+}
+module_init(dw_init);
+
+static void __exit dw_exit(void)
+{
+       platform_driver_unregister(&dw_driver);
+}
+module_exit(dw_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
+MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>");
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
new file mode 100644 (file)
index 0000000..00fdd18
--- /dev/null
@@ -0,0 +1,225 @@
+/*
+ * Driver for the Synopsys DesignWare AHB DMA Controller
+ *
+ * Copyright (C) 2005-2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dw_dmac.h>
+
+#define DW_DMA_MAX_NR_CHANNELS 8
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#define DW_REG(name)           u32 name; u32 __pad_##name
+
+/* Hardware register definitions. */
+struct dw_dma_chan_regs {
+       DW_REG(SAR);            /* Source Address Register */
+       DW_REG(DAR);            /* Destination Address Register */
+       DW_REG(LLP);            /* Linked List Pointer */
+       u32     CTL_LO;         /* Control Register Low */
+       u32     CTL_HI;         /* Control Register High */
+       DW_REG(SSTAT);
+       DW_REG(DSTAT);
+       DW_REG(SSTATAR);
+       DW_REG(DSTATAR);
+       u32     CFG_LO;         /* Configuration Register Low */
+       u32     CFG_HI;         /* Configuration Register High */
+       DW_REG(SGR);
+       DW_REG(DSR);
+};
+
+struct dw_dma_irq_regs {
+       DW_REG(XFER);
+       DW_REG(BLOCK);
+       DW_REG(SRC_TRAN);
+       DW_REG(DST_TRAN);
+       DW_REG(ERROR);
+};
+
+struct dw_dma_regs {
+       /* per-channel registers */
+       struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS];
+
+       /* irq handling */
+       struct dw_dma_irq_regs  RAW;            /* r */
+       struct dw_dma_irq_regs  STATUS;         /* r (raw & mask) */
+       struct dw_dma_irq_regs  MASK;           /* rw (set = irq enabled) */
+       struct dw_dma_irq_regs  CLEAR;          /* w (ack, affects "raw") */
+
+       DW_REG(STATUS_INT);                     /* r */
+
+       /* software handshaking */
+       DW_REG(REQ_SRC);
+       DW_REG(REQ_DST);
+       DW_REG(SGL_REQ_SRC);
+       DW_REG(SGL_REQ_DST);
+       DW_REG(LAST_SRC);
+       DW_REG(LAST_DST);
+
+       /* miscellaneous */
+       DW_REG(CFG);
+       DW_REG(CH_EN);
+       DW_REG(ID);
+       DW_REG(TEST);
+
+       /* optional encoded params, 0x3c8..0x3 */
+};
+
+/* Bitfields in CTL_LO */
+#define DWC_CTLL_INT_EN                (1 << 0)        /* irqs enabled? */
+#define DWC_CTLL_DST_WIDTH(n)  ((n)<<1)        /* bytes per element */
+#define DWC_CTLL_SRC_WIDTH(n)  ((n)<<4)
+#define DWC_CTLL_DST_INC       (0<<7)          /* DAR update/not */
+#define DWC_CTLL_DST_DEC       (1<<7)
+#define DWC_CTLL_DST_FIX       (2<<7)
+#define DWC_CTLL_SRC_INC       (0<<7)          /* SAR update/not */
+#define DWC_CTLL_SRC_DEC       (1<<9)
+#define DWC_CTLL_SRC_FIX       (2<<9)
+#define DWC_CTLL_DST_MSIZE(n)  ((n)<<11)       /* burst, #elements */
+#define DWC_CTLL_SRC_MSIZE(n)  ((n)<<14)
+#define DWC_CTLL_S_GATH_EN     (1 << 17)       /* src gather, !FIX */
+#define DWC_CTLL_D_SCAT_EN     (1 << 18)       /* dst scatter, !FIX */
+#define DWC_CTLL_FC_M2M                (0 << 20)       /* mem-to-mem */
+#define DWC_CTLL_FC_M2P                (1 << 20)       /* mem-to-periph */
+#define DWC_CTLL_FC_P2M                (2 << 20)       /* periph-to-mem */
+#define DWC_CTLL_FC_P2P                (3 << 20)       /* periph-to-periph */
+/* plus 4 transfer types for peripheral-as-flow-controller */
+#define DWC_CTLL_DMS(n)                ((n)<<23)       /* dst master select */
+#define DWC_CTLL_SMS(n)                ((n)<<25)       /* src master select */
+#define DWC_CTLL_LLP_D_EN      (1 << 27)       /* dest block chain */
+#define DWC_CTLL_LLP_S_EN      (1 << 28)       /* src block chain */
+
+/* Bitfields in CTL_HI */
+#define DWC_CTLH_DONE          0x00001000
+#define DWC_CTLH_BLOCK_TS_MASK 0x00000fff
+
+/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGL_CH_SUSP       (1 << 8)        /* pause xfer */
+#define DWC_CFGL_FIFO_EMPTY    (1 << 9)        /* pause xfer */
+#define DWC_CFGL_HS_DST                (1 << 10)       /* handshake w/dst */
+#define DWC_CFGL_HS_SRC                (1 << 11)       /* handshake w/src */
+#define DWC_CFGL_MAX_BURST(x)  ((x) << 20)
+#define DWC_CFGL_RELOAD_SAR    (1 << 30)
+#define DWC_CFGL_RELOAD_DAR    (1 << 31)
+
+/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGH_DS_UPD_EN     (1 << 5)
+#define DWC_CFGH_SS_UPD_EN     (1 << 6)
+
+/* Bitfields in SGR */
+#define DWC_SGR_SGI(x)         ((x) << 0)
+#define DWC_SGR_SGC(x)         ((x) << 20)
+
+/* Bitfields in DSR */
+#define DWC_DSR_DSI(x)         ((x) << 0)
+#define DWC_DSR_DSC(x)         ((x) << 20)
+
+/* Bitfields in CFG */
+#define DW_CFG_DMA_EN          (1 << 0)
+
+#define DW_REGLEN              0x400
+
+struct dw_dma_chan {
+       struct dma_chan         chan;
+       void __iomem            *ch_regs;
+       u8                      mask;
+
+       spinlock_t              lock;
+
+       /* these other elements are all protected by lock */
+       dma_cookie_t            completed;
+       struct list_head        active_list;
+       struct list_head        queue;
+       struct list_head        free_list;
+
+       struct dw_dma_slave     *dws;
+
+       unsigned int            descs_allocated;
+};
+
+static inline struct dw_dma_chan_regs __iomem *
+__dwc_regs(struct dw_dma_chan *dwc)
+{
+       return dwc->ch_regs;
+}
+
+#define channel_readl(dwc, name) \
+       __raw_readl(&(__dwc_regs(dwc)->name))
+#define channel_writel(dwc, name, val) \
+       __raw_writel((val), &(__dwc_regs(dwc)->name))
+
+static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
+{
+       return container_of(chan, struct dw_dma_chan, chan);
+}
+
+
+struct dw_dma {
+       struct dma_device       dma;
+       void __iomem            *regs;
+       struct tasklet_struct   tasklet;
+       struct clk              *clk;
+
+       u8                      all_chan_mask;
+
+       struct dw_dma_chan      chan[0];
+};
+
+static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+{
+       return dw->regs;
+}
+
+#define dma_readl(dw, name) \
+       __raw_readl(&(__dw_regs(dw)->name))
+#define dma_writel(dw, name, val) \
+       __raw_writel((val), &(__dw_regs(dw)->name))
+
+#define channel_set_bit(dw, reg, mask) \
+       dma_writel(dw, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(dw, reg, mask) \
+       dma_writel(dw, reg, ((mask) << 8) | 0)
+
+static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
+{
+       return container_of(ddev, struct dw_dma, dma);
+}
+
+/* LLI == Linked List Item; a.k.a. DMA block descriptor */
+struct dw_lli {
+       /* values that are not changed by hardware */
+       dma_addr_t      sar;
+       dma_addr_t      dar;
+       dma_addr_t      llp;            /* chain to next lli */
+       u32             ctllo;
+       /* values that may get written back: */
+       u32             ctlhi;
+       /* sstat and dstat can snapshot peripheral register state.
+        * silicon config may discard either or both...
+        */
+       u32             sstat;
+       u32             dstat;
+};
+
+struct dw_desc {
+       /* FIRST values the hardware uses */
+       struct dw_lli                   lli;
+
+       /* THEN values for driver housekeeping */
+       struct list_head                desc_node;
+       struct dma_async_tx_descriptor  txd;
+       size_t                          len;
+};
+
+static inline struct dw_desc *
+txd_to_dw_desc(struct dma_async_tx_descriptor *txd)
+{
+       return container_of(txd, struct dw_desc, txd);
+}
index 054eabffc185a893ff32ab51c01dfd876ab84b43..c0059ca5834075e70f3fc59512d9ff69617bee29 100644 (file)
@@ -366,7 +366,8 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
  *
  * Return - The number of descriptors allocated.
  */
-static int fsl_dma_alloc_chan_resources(struct dma_chan *chan)
+static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
+                                       struct dma_client *client)
 {
        struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
        LIST_HEAD(tmp_list);
@@ -809,8 +810,7 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
        if (!src) {
                dev_err(fsl_chan->dev,
                                "selftest: Cannot alloc memory for test!\n");
-               err = -ENOMEM;
-               goto out;
+               return -ENOMEM;
        }
 
        dest = src + test_size;
@@ -820,7 +820,7 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
 
        chan = &fsl_chan->common;
 
-       if (fsl_dma_alloc_chan_resources(chan) < 1) {
+       if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
                dev_err(fsl_chan->dev,
                                "selftest: Cannot alloc resources for DMA\n");
                err = -ENODEV;
@@ -842,13 +842,13 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
        if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
                dev_err(fsl_chan->dev, "selftest: Time out!\n");
                err = -ENODEV;
-               goto out;
+               goto free_resources;
        }
 
        /* Test free and re-alloc channel resources */
        fsl_dma_free_chan_resources(chan);
 
-       if (fsl_dma_alloc_chan_resources(chan) < 1) {
+       if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
                dev_err(fsl_chan->dev,
                                "selftest: Cannot alloc resources for DMA\n");
                err = -ENODEV;
@@ -927,8 +927,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
        if (!new_fsl_chan) {
                dev_err(&dev->dev, "No free memory for allocating "
                                "dma channels!\n");
-               err = -ENOMEM;
-               goto err;
+               return -ENOMEM;
        }
 
        /* get dma channel register base */
@@ -936,7 +935,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
        if (err) {
                dev_err(&dev->dev, "Can't get %s property 'reg'\n",
                                dev->node->full_name);
-               goto err;
+               goto err_no_reg;
        }
 
        new_fsl_chan->feature = *(u32 *)match->data;
@@ -958,7 +957,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
                dev_err(&dev->dev, "There is no %d channel!\n",
                                new_fsl_chan->id);
                err = -EINVAL;
-               goto err;
+               goto err_no_chan;
        }
        fdev->chan[new_fsl_chan->id] = new_fsl_chan;
        tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet,
@@ -997,23 +996,26 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
                if (err) {
                        dev_err(&dev->dev, "DMA channel %s request_irq error "
                                "with return %d\n", dev->node->full_name, err);
-                       goto err;
+                       goto err_no_irq;
                }
        }
 
        err = fsl_dma_self_test(new_fsl_chan);
        if (err)
-               goto err;
+               goto err_self_test;
 
        dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
                                match->compatible, new_fsl_chan->irq);
 
        return 0;
-err:
-       dma_halt(new_fsl_chan);
-       iounmap(new_fsl_chan->reg_base);
+
+err_self_test:
        free_irq(new_fsl_chan->irq, new_fsl_chan);
+err_no_irq:
        list_del(&new_fsl_chan->common.device_node);
+err_no_chan:
+       iounmap(new_fsl_chan->reg_base);
+err_no_reg:
        kfree(new_fsl_chan);
        return err;
 }
@@ -1054,8 +1056,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
        fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
        if (!fdev) {
                dev_err(&dev->dev, "No enough memory for 'priv'\n");
-               err = -ENOMEM;
-               goto err;
+               return -ENOMEM;
        }
        fdev->dev = &dev->dev;
        INIT_LIST_HEAD(&fdev->common.channels);
@@ -1065,7 +1066,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
        if (err) {
                dev_err(&dev->dev, "Can't get %s property 'reg'\n",
                                dev->node->full_name);
-               goto err;
+               goto err_no_reg;
        }
 
        dev_info(&dev->dev, "Probe the Freescale DMA driver for %s "
@@ -1103,6 +1104,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 
 err:
        iounmap(fdev->reg_base);
+err_no_reg:
        kfree(fdev);
        return err;
 }
index 16e0fd8facfb8ca30aaa4427815f165b0f60317f..9b16a3af9a0af6a9a377d0f10048216ac0f86510 100644 (file)
@@ -47,6 +47,16 @@ static struct pci_device_id ioat_pci_tbl[] = {
 
        /* I/OAT v2 platforms */
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+       /* I/OAT v3 platforms */
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
        { 0, }
 };
 
@@ -83,6 +93,11 @@ static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase)
                if (device->dma && ioat_dca_enabled)
                        device->dca = ioat2_dca_init(pdev, iobase);
                break;
+       case IOAT_VER_3_0:
+               device->dma = ioat_dma_probe(pdev, iobase);
+               if (device->dma && ioat_dca_enabled)
+                       device->dca = ioat3_dca_init(pdev, iobase);
+               break;
        default:
                err = -ENODEV;
                break;
index 9e922760b7ffc4b76688bf56652e72085827e7da..6cf622da0286481fccb32a73285c72c83fb6a57b 100644 (file)
 #include "ioatdma_registers.h"
 
 /*
- * Bit 16 of a tag map entry is the "valid" bit, if it is set then bits 0:15
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
  * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
  * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
  */
 #define DCA_TAG_MAP_VALID 0x80
 
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
 /*
  * "Legacy" DCA systems do not implement the DCA register set in the
  * I/OAT device.  Software needs direct support for their tag mappings.
@@ -95,6 +101,7 @@ struct ioat_dca_slot {
 };
 
 #define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
 
 struct ioat_dca_priv {
        void __iomem            *iobase;
@@ -171,7 +178,9 @@ static int ioat_dca_remove_requester(struct dca_provider *dca,
        return -ENODEV;
 }
 
-static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu)
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+                          struct device *dev,
+                          int cpu)
 {
        struct ioat_dca_priv *ioatdca = dca_priv(dca);
        int i, apic_id, bit, value;
@@ -193,10 +202,26 @@ static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu)
        return tag;
 }
 
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+                               struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+
+       pdev = to_pci_dev(dev);
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev)
+                       return 1;
+       }
+       return 0;
+}
+
 static struct dca_ops ioat_dca_ops = {
        .add_requester          = ioat_dca_add_requester,
        .remove_requester       = ioat_dca_remove_requester,
        .get_tag                = ioat_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
 };
 
 
@@ -207,6 +232,8 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
        u8 *tag_map = NULL;
        int i;
        int err;
+       u8 version;
+       u8 max_requesters;
 
        if (!system_has_dca_enabled(pdev))
                return NULL;
@@ -237,15 +264,20 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
        if (tag_map == NULL)
                return NULL;
 
+       version = readb(iobase + IOAT_VER_OFFSET);
+       if (version == IOAT_VER_3_0)
+               max_requesters = IOAT3_DCA_MAX_REQ;
+       else
+               max_requesters = IOAT_DCA_MAX_REQ;
+
        dca = alloc_dca_provider(&ioat_dca_ops,
                        sizeof(*ioatdca) +
-                       (sizeof(struct ioat_dca_slot) * IOAT_DCA_MAX_REQ));
+                       (sizeof(struct ioat_dca_slot) * max_requesters));
        if (!dca)
                return NULL;
 
        ioatdca = dca_priv(dca);
-       ioatdca->max_requesters = IOAT_DCA_MAX_REQ;
-
+       ioatdca->max_requesters = max_requesters;
        ioatdca->dca_base = iobase + 0x54;
 
        /* copy over the APIC ID to DCA tag mapping */
@@ -323,11 +355,13 @@ static int ioat2_dca_remove_requester(struct dca_provider *dca,
        return -ENODEV;
 }
 
-static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu)
+static u8 ioat2_dca_get_tag(struct dca_provider *dca,
+                           struct device *dev,
+                           int cpu)
 {
        u8 tag;
 
-       tag = ioat_dca_get_tag(dca, cpu);
+       tag = ioat_dca_get_tag(dca, dev, cpu);
        tag = (~tag) & 0x1F;
        return tag;
 }
@@ -336,6 +370,7 @@ static struct dca_ops ioat2_dca_ops = {
        .add_requester          = ioat2_dca_add_requester,
        .remove_requester       = ioat2_dca_remove_requester,
        .get_tag                = ioat2_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
 };
 
 static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
@@ -425,3 +460,198 @@ struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
 
        return dca;
 }
+
+static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 id;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+       id = dcaid_from_pcidev(pdev);
+
+       if (ioatdca->requester_count == ioatdca->max_requesters)
+               return -ENODEV;
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == NULL) {
+                       /* found an empty slot */
+                       ioatdca->requester_count++;
+                       ioatdca->req_slots[i].pdev = pdev;
+                       ioatdca->req_slots[i].rid = id;
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+                       writel(id | IOAT_DCA_GREQID_VALID,
+                              ioatdca->iobase + global_req_table + (i * 4));
+                       return i;
+               }
+       }
+       /* Error, ioatdma->requester_count is out of whack */
+       return -EFAULT;
+}
+
+static int ioat3_dca_remove_requester(struct dca_provider *dca,
+                                     struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev) {
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
+                       ioatdca->req_slots[i].pdev = NULL;
+                       ioatdca->req_slots[i].rid = 0;
+                       ioatdca->requester_count--;
+                       return i;
+               }
+       }
+       return -ENODEV;
+}
+
+static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+                           struct device *dev,
+                           int cpu)
+{
+       u8 tag;
+
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       int i, apic_id, bit, value;
+       u8 entry;
+
+       tag = 0;
+       apic_id = cpu_physical_id(cpu);
+
+       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+               entry = ioatdca->tag_map[i];
+               if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+                       bit = entry &
+                               ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+                       value = (apic_id & (1 << bit)) ? 1 : 0;
+               } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+                       bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+                       value = (apic_id & (1 << bit)) ? 0 : 1;
+               } else {
+                       value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+               }
+               tag |= (value << i);
+       }
+
+       return tag;
+}
+
+static struct dca_ops ioat3_dca_ops = {
+       .add_requester          = ioat3_dca_add_requester,
+       .remove_requester       = ioat3_dca_remove_requester,
+       .get_tag                = ioat3_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
+};
+
+static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+       int slots = 0;
+       u32 req;
+       u16 global_req_table;
+
+       global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+       if (global_req_table == 0)
+               return 0;
+
+       do {
+               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+               slots++;
+       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+       return slots;
+}
+
+struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct dca_provider *dca;
+       struct ioat_dca_priv *ioatdca;
+       int slots;
+       int i;
+       int err;
+       u16 dca_offset;
+       u16 csi_fsb_control;
+       u16 pcie_control;
+       u8 bit;
+
+       union {
+               u64 full;
+               struct {
+                       u32 low;
+                       u32 high;
+               };
+       } tag_map;
+
+       if (!system_has_dca_enabled(pdev))
+               return NULL;
+
+       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+       if (dca_offset == 0)
+               return NULL;
+
+       slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+       if (slots == 0)
+               return NULL;
+
+       dca = alloc_dca_provider(&ioat3_dca_ops,
+                                sizeof(*ioatdca)
+                                     + (sizeof(struct ioat_dca_slot) * slots));
+       if (!dca)
+               return NULL;
+
+       ioatdca = dca_priv(dca);
+       ioatdca->iobase = iobase;
+       ioatdca->dca_base = iobase + dca_offset;
+       ioatdca->max_requesters = slots;
+
+       /* some bios might not know to turn these on */
+       csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+       if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+               csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+               writew(csi_fsb_control,
+                      ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+       }
+       pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+       if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+               pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+               writew(pcie_control,
+                      ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+       }
+
+
+       /* TODO version, compatibility and configuration checks */
+
+       /* copy out the APIC to DCA tag map */
+       tag_map.low =
+               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+       tag_map.high =
+               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+       for (i = 0; i < 8; i++) {
+               bit = tag_map.full >> (8 * i);
+               ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+       }
+
+       err = register_dca_provider(dca, &pdev->dev);
+       if (err) {
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       return dca;
+}
index 318e8a22d81423a4da8cfb759be3c9854148252e..a52156e568867e827c30f0008de9477df493b730 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
 #include "ioatdma.h"
 #include "ioatdma_registers.h"
 #include "ioatdma_hw.h"
 #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
 #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
 
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
 static int ioat_pending_level = 4;
 module_param(ioat_pending_level, int, 0644);
 MODULE_PARM_DESC(ioat_pending_level,
                 "high-water mark for pushing ioat descriptors (default: 4)");
 
+#define RESET_DELAY  msecs_to_jiffies(100)
+#define WATCHDOG_DELAY  round_jiffies(msecs_to_jiffies(2000))
+static void ioat_dma_chan_reset_part2(struct work_struct *work);
+static void ioat_dma_chan_watchdog(struct work_struct *work);
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
 /* internal functions */
 static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
 static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
@@ -122,6 +135,38 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
        int i;
        struct ioat_dma_chan *ioat_chan;
 
+       /*
+        * IOAT ver.3 workarounds
+        */
+       if (device->version == IOAT_VER_3_0) {
+               u32 chan_err_mask;
+               u16 dev_id;
+               u32 dmauncerrsts;
+
+               /*
+                * Write CHANERRMSK_INT with 3E07h to mask out the errors
+                * that can cause stability issues for IOAT ver.3
+                */
+               chan_err_mask = 0x3E07;
+               pci_write_config_dword(device->pdev,
+                       IOAT_PCI_CHANERRMASK_INT_OFFSET,
+                       chan_err_mask);
+
+               /*
+                * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+                * (workaround for spurious config parity error after restart)
+                */
+               pci_read_config_word(device->pdev,
+                       IOAT_PCI_DEVICE_ID_OFFSET,
+                       &dev_id);
+               if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
+                       dmauncerrsts = 0x10;
+                       pci_write_config_dword(device->pdev,
+                               IOAT_PCI_DMAUNCERRSTS_OFFSET,
+                               dmauncerrsts);
+               }
+       }
+
        device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
        xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
        xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
@@ -137,6 +182,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
                ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
                ioat_chan->xfercap = xfercap;
                ioat_chan->desccount = 0;
+               INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
                if (ioat_chan->device->version != IOAT_VER_1_2) {
                        writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE
                                        | IOAT_DMA_DCA_ANY_CPU,
@@ -175,7 +221,7 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
 {
        struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
 
-       if (ioat_chan->pending != 0) {
+       if (ioat_chan->pending > 0) {
                spin_lock_bh(&ioat_chan->desc_lock);
                __ioat1_dma_memcpy_issue_pending(ioat_chan);
                spin_unlock_bh(&ioat_chan->desc_lock);
@@ -194,13 +240,228 @@ static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
 {
        struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
 
-       if (ioat_chan->pending != 0) {
+       if (ioat_chan->pending > 0) {
                spin_lock_bh(&ioat_chan->desc_lock);
                __ioat2_dma_memcpy_issue_pending(ioat_chan);
                spin_unlock_bh(&ioat_chan->desc_lock);
        }
 }
 
+
+/**
+ * ioat_dma_chan_reset_part2 - reinit the channel after a reset
+ */
+static void ioat_dma_chan_reset_part2(struct work_struct *work)
+{
+       struct ioat_dma_chan *ioat_chan =
+               container_of(work, struct ioat_dma_chan, work.work);
+       struct ioat_desc_sw *desc;
+
+       spin_lock_bh(&ioat_chan->cleanup_lock);
+       spin_lock_bh(&ioat_chan->desc_lock);
+
+       ioat_chan->completion_virt->low = 0;
+       ioat_chan->completion_virt->high = 0;
+       ioat_chan->pending = 0;
+
+       /*
+        * count the descriptors waiting, and be sure to do it
+        * right for both the CB1 line and the CB2 ring
+        */
+       ioat_chan->dmacount = 0;
+       if (ioat_chan->used_desc.prev) {
+               desc = to_ioat_desc(ioat_chan->used_desc.prev);
+               do {
+                       ioat_chan->dmacount++;
+                       desc = to_ioat_desc(desc->node.next);
+               } while (&desc->node != ioat_chan->used_desc.next);
+       }
+
+       /*
+        * write the new starting descriptor address
+        * this puts channel engine into ARMED state
+        */
+       desc = to_ioat_desc(ioat_chan->used_desc.prev);
+       switch (ioat_chan->device->version) {
+       case IOAT_VER_1_2:
+               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+               writel(((u64) desc->async_tx.phys) >> 32,
+                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+
+               writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
+                       + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+               break;
+       case IOAT_VER_2_0:
+               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+               writel(((u64) desc->async_tx.phys) >> 32,
+                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+
+               /* tell the engine to go with what's left to be done */
+               writew(ioat_chan->dmacount,
+                      ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+
+               break;
+       }
+       dev_err(&ioat_chan->device->pdev->dev,
+               "chan%d reset - %d descs waiting, %d total desc\n",
+               chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
+
+       spin_unlock_bh(&ioat_chan->desc_lock);
+       spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+/**
+ * ioat_dma_reset_channel - restart a channel
+ * @ioat_chan: IOAT DMA channel handle
+ */
+static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
+{
+       u32 chansts, chanerr;
+
+       if (!ioat_chan->used_desc.prev)
+               return;
+
+       chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+       chansts = (ioat_chan->completion_virt->low
+                                       & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
+       if (chanerr) {
+               dev_err(&ioat_chan->device->pdev->dev,
+                       "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+                       chan_num(ioat_chan), chansts, chanerr);
+               writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+       }
+
+       /*
+        * whack it upside the head with a reset
+        * and wait for things to settle out.
+        * force the pending count to a really big negative
+        * to make sure no one forces an issue_pending
+        * while we're waiting.
+        */
+
+       spin_lock_bh(&ioat_chan->desc_lock);
+       ioat_chan->pending = INT_MIN;
+       writeb(IOAT_CHANCMD_RESET,
+              ioat_chan->reg_base
+              + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+       spin_unlock_bh(&ioat_chan->desc_lock);
+
+       /* schedule the 2nd half instead of sleeping a long time */
+       schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
+}
+
+/**
+ * ioat_dma_chan_watchdog - watch for stuck channels
+ */
+static void ioat_dma_chan_watchdog(struct work_struct *work)
+{
+       struct ioatdma_device *device =
+               container_of(work, struct ioatdma_device, work.work);
+       struct ioat_dma_chan *ioat_chan;
+       int i;
+
+       union {
+               u64 full;
+               struct {
+                       u32 low;
+                       u32 high;
+               };
+       } completion_hw;
+       unsigned long compl_desc_addr_hw;
+
+       for (i = 0; i < device->common.chancnt; i++) {
+               ioat_chan = ioat_lookup_chan_by_index(device, i);
+
+               if (ioat_chan->device->version == IOAT_VER_1_2
+                       /* have we started processing anything yet */
+                   && ioat_chan->last_completion
+                       /* have we completed any since last watchdog cycle? */
+                   && (ioat_chan->last_completion ==
+                               ioat_chan->watchdog_completion)
+                       /* has TCP stuck on one cookie since last watchdog? */
+                   && (ioat_chan->watchdog_tcp_cookie ==
+                               ioat_chan->watchdog_last_tcp_cookie)
+                   && (ioat_chan->watchdog_tcp_cookie !=
+                               ioat_chan->completed_cookie)
+                       /* is there something in the chain to be processed? */
+                       /* CB1 chain always has at least the last one processed */
+                   && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
+                   && ioat_chan->pending == 0) {
+
+                       /*
+                        * check CHANSTS register for completed
+                        * descriptor address.
+                        * if it is different than completion writeback,
+                        * it is not zero
+                        * and it has changed since the last watchdog
+                        *     we can assume that channel
+                        *     is still working correctly
+                        *     and the problem is in completion writeback.
+                        *     update completion writeback
+                        *     with actual CHANSTS value
+                        * else
+                        *     try resetting the channel
+                        */
+
+                       completion_hw.low = readl(ioat_chan->reg_base +
+                               IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
+                       completion_hw.high = readl(ioat_chan->reg_base +
+                               IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
+#if (BITS_PER_LONG == 64)
+                       compl_desc_addr_hw =
+                               completion_hw.full
+                               & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+#else
+                       compl_desc_addr_hw =
+                               completion_hw.low & IOAT_LOW_COMPLETION_MASK;
+#endif
+
+                       if ((compl_desc_addr_hw != 0)
+                          && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
+                          && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
+                               ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
+                               ioat_chan->completion_virt->low = completion_hw.low;
+                               ioat_chan->completion_virt->high = completion_hw.high;
+                       } else {
+                               ioat_dma_reset_channel(ioat_chan);
+                               ioat_chan->watchdog_completion = 0;
+                               ioat_chan->last_compl_desc_addr_hw = 0;
+                       }
+
+               /*
+                * for version 2.0 if there are descriptors yet to be processed
+                * and the last completed hasn't changed since the last watchdog
+                *      if they haven't hit the pending level
+                *          issue the pending to push them through
+                *      else
+                *          try resetting the channel
+                */
+               } else if (ioat_chan->device->version == IOAT_VER_2_0
+                   && ioat_chan->used_desc.prev
+                   && ioat_chan->last_completion
+                   && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
+
+                       if (ioat_chan->pending < ioat_pending_level)
+                               ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
+                       else {
+                               ioat_dma_reset_channel(ioat_chan);
+                               ioat_chan->watchdog_completion = 0;
+                       }
+               } else {
+                       ioat_chan->last_compl_desc_addr_hw = 0;
+                       ioat_chan->watchdog_completion
+                                       = ioat_chan->last_completion;
+               }
+
+               ioat_chan->watchdog_last_tcp_cookie =
+                       ioat_chan->watchdog_tcp_cookie;
+       }
+
+       schedule_delayed_work(&device->work, WATCHDOG_DELAY);
+}
+
 static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
 {
        struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
@@ -250,6 +511,13 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
                prev = new;
        } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
 
+       if (!new) {
+               dev_err(&ioat_chan->device->pdev->dev,
+                       "tx submit failed\n");
+               spin_unlock_bh(&ioat_chan->desc_lock);
+               return -ENOMEM;
+       }
+
        hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
        if (new->async_tx.callback) {
                hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
@@ -335,7 +603,14 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
                desc_count++;
        } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
 
-       hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+       if (!new) {
+               dev_err(&ioat_chan->device->pdev->dev,
+                       "tx submit failed\n");
+               spin_unlock_bh(&ioat_chan->desc_lock);
+               return -ENOMEM;
+       }
+
+       hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
        if (new->async_tx.callback) {
                hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
                if (first != new) {
@@ -406,6 +681,7 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
                desc_sw->async_tx.tx_submit = ioat1_tx_submit;
                break;
        case IOAT_VER_2_0:
+       case IOAT_VER_3_0:
                desc_sw->async_tx.tx_submit = ioat2_tx_submit;
                break;
        }
@@ -452,7 +728,8 @@ static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
  * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
  * @chan: the channel to be filled out
  */
-static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
+static int ioat_dma_alloc_chan_resources(struct dma_chan *chan,
+                                        struct dma_client *client)
 {
        struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
        struct ioat_desc_sw *desc;
@@ -555,6 +832,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
                }
                break;
        case IOAT_VER_2_0:
+       case IOAT_VER_3_0:
                list_for_each_entry_safe(desc, _desc,
                                         ioat_chan->free_desc.next, node) {
                        list_del(&desc->node);
@@ -585,6 +863,10 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
        ioat_chan->last_completion = ioat_chan->completion_addr = 0;
        ioat_chan->pending = 0;
        ioat_chan->dmacount = 0;
+       ioat_chan->watchdog_completion = 0;
+       ioat_chan->last_compl_desc_addr_hw = 0;
+       ioat_chan->watchdog_tcp_cookie =
+               ioat_chan->watchdog_last_tcp_cookie = 0;
 }
 
 /**
@@ -640,7 +922,8 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
 
                /* set up the noop descriptor */
                noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
-               noop_desc->hw->size = 0;
+               /* set size to non-zero value (channel returns error when size is 0) */
+               noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
                noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
                noop_desc->hw->src_addr = 0;
                noop_desc->hw->dst_addr = 0;
@@ -690,6 +973,7 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
                return ioat1_dma_get_next_descriptor(ioat_chan);
                break;
        case IOAT_VER_2_0:
+       case IOAT_VER_3_0:
                return ioat2_dma_get_next_descriptor(ioat_chan);
                break;
        }
@@ -716,8 +1000,12 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
                new->src = dma_src;
                new->async_tx.flags = flags;
                return &new->async_tx;
-       } else
+       } else {
+               dev_err(&ioat_chan->device->pdev->dev,
+                       "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
+                       chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
                return NULL;
+       }
 }
 
 static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
@@ -744,8 +1032,13 @@ static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
                new->src = dma_src;
                new->async_tx.flags = flags;
                return &new->async_tx;
-       } else
+       } else {
+               spin_unlock_bh(&ioat_chan->desc_lock);
+               dev_err(&ioat_chan->device->pdev->dev,
+                       "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
+                       chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
                return NULL;
+       }
 }
 
 static void ioat_dma_cleanup_tasklet(unsigned long data)
@@ -756,6 +1049,27 @@ static void ioat_dma_cleanup_tasklet(unsigned long data)
               chan->reg_base + IOAT_CHANCTRL_OFFSET);
 }
 
+static void
+ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
+{
+       /*
+        * yes we are unmapping both _page and _single
+        * alloc'd regions with unmap_page. Is this
+        * *really* that bad?
+        */
+       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP))
+               pci_unmap_page(ioat_chan->device->pdev,
+                               pci_unmap_addr(desc, dst),
+                               pci_unmap_len(desc, len),
+                               PCI_DMA_FROMDEVICE);
+
+       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP))
+               pci_unmap_page(ioat_chan->device->pdev,
+                               pci_unmap_addr(desc, src),
+                               pci_unmap_len(desc, len),
+                               PCI_DMA_TODEVICE);
+}
+
 /**
  * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
  * @chan: ioat channel to be cleaned up
@@ -799,11 +1113,27 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
 
        if (phys_complete == ioat_chan->last_completion) {
                spin_unlock_bh(&ioat_chan->cleanup_lock);
+               /*
+                * perhaps we're stuck so hard that the watchdog can't go off?
+                * try to catch it after 2 seconds
+                */
+               if (ioat_chan->device->version != IOAT_VER_3_0) {
+                       if (time_after(jiffies,
+                                      ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
+                               ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
+                               ioat_chan->last_completion_time = jiffies;
+                       }
+               }
                return;
        }
+       ioat_chan->last_completion_time = jiffies;
 
        cookie = 0;
-       spin_lock_bh(&ioat_chan->desc_lock);
+       if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
+               spin_unlock_bh(&ioat_chan->cleanup_lock);
+               return;
+       }
+
        switch (ioat_chan->device->version) {
        case IOAT_VER_1_2:
                list_for_each_entry_safe(desc, _desc,
@@ -816,21 +1146,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
                         */
                        if (desc->async_tx.cookie) {
                                cookie = desc->async_tx.cookie;
-
-                               /*
-                                * yes we are unmapping both _page and _single
-                                * alloc'd regions with unmap_page. Is this
-                                * *really* that bad?
-                                */
-                               pci_unmap_page(ioat_chan->device->pdev,
-                                               pci_unmap_addr(desc, dst),
-                                               pci_unmap_len(desc, len),
-                                               PCI_DMA_FROMDEVICE);
-                               pci_unmap_page(ioat_chan->device->pdev,
-                                               pci_unmap_addr(desc, src),
-                                               pci_unmap_len(desc, len),
-                                               PCI_DMA_TODEVICE);
-
+                               ioat_dma_unmap(ioat_chan, desc);
                                if (desc->async_tx.callback) {
                                        desc->async_tx.callback(desc->async_tx.callback_param);
                                        desc->async_tx.callback = NULL;
@@ -862,6 +1178,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
                }
                break;
        case IOAT_VER_2_0:
+       case IOAT_VER_3_0:
                /* has some other thread has already cleaned up? */
                if (ioat_chan->used_desc.prev == NULL)
                        break;
@@ -889,16 +1206,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
                                if (desc->async_tx.cookie) {
                                        cookie = desc->async_tx.cookie;
                                        desc->async_tx.cookie = 0;
-
-                                       pci_unmap_page(ioat_chan->device->pdev,
-                                                     pci_unmap_addr(desc, dst),
-                                                     pci_unmap_len(desc, len),
-                                                     PCI_DMA_FROMDEVICE);
-                                       pci_unmap_page(ioat_chan->device->pdev,
-                                                     pci_unmap_addr(desc, src),
-                                                     pci_unmap_len(desc, len),
-                                                     PCI_DMA_TODEVICE);
-
+                                       ioat_dma_unmap(ioat_chan, desc);
                                        if (desc->async_tx.callback) {
                                                desc->async_tx.callback(desc->async_tx.callback_param);
                                                desc->async_tx.callback = NULL;
@@ -943,6 +1251,7 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
 
        last_used = chan->cookie;
        last_complete = ioat_chan->completed_cookie;
+       ioat_chan->watchdog_tcp_cookie = cookie;
 
        if (done)
                *done = last_complete;
@@ -973,10 +1282,19 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
        spin_lock_bh(&ioat_chan->desc_lock);
 
        desc = ioat_dma_get_next_descriptor(ioat_chan);
+
+       if (!desc) {
+               dev_err(&ioat_chan->device->pdev->dev,
+                       "Unable to start null desc - get next desc failed\n");
+               spin_unlock_bh(&ioat_chan->desc_lock);
+               return;
+       }
+
        desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
                                | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
                                | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
-       desc->hw->size = 0;
+       /* set size to non-zero value (channel returns error when size is 0) */
+       desc->hw->size = NULL_DESC_BUFFER_SIZE;
        desc->hw->src_addr = 0;
        desc->hw->dst_addr = 0;
        async_tx_ack(&desc->async_tx);
@@ -994,6 +1312,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
                        + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
                break;
        case IOAT_VER_2_0:
+       case IOAT_VER_3_0:
                writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
                       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
                writel(((u64) desc->async_tx.phys) >> 32,
@@ -1049,7 +1368,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
        dma_chan = container_of(device->common.channels.next,
                                struct dma_chan,
                                device_node);
-       if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
+       if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) {
                dev_err(&device->pdev->dev,
                        "selftest cannot allocate chan resource\n");
                err = -ENODEV;
@@ -1312,6 +1631,7 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
                                                ioat1_dma_memcpy_issue_pending;
                break;
        case IOAT_VER_2_0:
+       case IOAT_VER_3_0:
                device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
                device->common.device_issue_pending =
                                                ioat2_dma_memcpy_issue_pending;
@@ -1331,8 +1651,16 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
        if (err)
                goto err_self_test;
 
+       ioat_set_tcp_copy_break(device);
+
        dma_async_device_register(&device->common);
 
+       if (device->version != IOAT_VER_3_0) {
+               INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
+               schedule_delayed_work(&device->work,
+                                     WATCHDOG_DELAY);
+       }
+
        return device;
 
 err_self_test:
@@ -1365,6 +1693,10 @@ void ioat_dma_remove(struct ioatdma_device *device)
        pci_release_regions(device->pdev);
        pci_disable_device(device->pdev);
 
+       if (device->version != IOAT_VER_3_0) {
+               cancel_delayed_work(&device->work);
+       }
+
        list_for_each_entry_safe(chan, _chan,
                                 &device->common.channels, device_node) {
                ioat_chan = to_ioat_chan(chan);
index f2c7fedbf009b545c72e13fe69cfae54117ed4b9..a3306d0e1372a44b2950bc06d220d9b716f0bb00 100644 (file)
@@ -27,8 +27,9 @@
 #include <linux/dmapool.h>
 #include <linux/cache.h>
 #include <linux/pci_ids.h>
+#include <net/tcp.h>
 
-#define IOAT_DMA_VERSION  "2.04"
+#define IOAT_DMA_VERSION  "3.30"
 
 enum ioat_interrupt {
        none = 0,
@@ -40,6 +41,7 @@ enum ioat_interrupt {
 
 #define IOAT_LOW_COMPLETION_MASK       0xffffffc0
 #define IOAT_DMA_DCA_ANY_CPU           ~0
+#define IOAT_WATCHDOG_PERIOD           (2 * HZ)
 
 
 /**
@@ -62,6 +64,7 @@ struct ioatdma_device {
        struct dma_device common;
        u8 version;
        enum ioat_interrupt irq_mode;
+       struct delayed_work work;
        struct msix_entry msix_entries[4];
        struct ioat_dma_chan *idx[4];
 };
@@ -75,6 +78,7 @@ struct ioat_dma_chan {
 
        dma_cookie_t completed_cookie;
        unsigned long last_completion;
+       unsigned long last_completion_time;
 
        size_t xfercap; /* XFERCAP register value expanded out */
 
@@ -82,6 +86,10 @@ struct ioat_dma_chan {
        spinlock_t desc_lock;
        struct list_head free_desc;
        struct list_head used_desc;
+       unsigned long watchdog_completion;
+       int watchdog_tcp_cookie;
+       u32 watchdog_last_tcp_cookie;
+       struct delayed_work work;
 
        int pending;
        int dmacount;
@@ -98,6 +106,7 @@ struct ioat_dma_chan {
                        u32 high;
                };
        } *completion_virt;
+       unsigned long last_compl_desc_addr_hw;
        struct tasklet_struct cleanup_task;
 };
 
@@ -121,17 +130,34 @@ struct ioat_desc_sw {
        struct dma_async_tx_descriptor async_tx;
 };
 
+static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
+{
+       #ifdef CONFIG_NET_DMA
+       switch (dev->version) {
+       case IOAT_VER_1_2:
+       case IOAT_VER_3_0:
+               sysctl_tcp_dma_copybreak = 4096;
+               break;
+       case IOAT_VER_2_0:
+               sysctl_tcp_dma_copybreak = 2048;
+               break;
+       }
+       #endif
+}
+
 #if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
 struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
                                      void __iomem *iobase);
 void ioat_dma_remove(struct ioatdma_device *device);
 struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 #else
 #define ioat_dma_probe(pdev, iobase)    NULL
 #define ioat_dma_remove(device)         do { } while (0)
 #define ioat_dca_init(pdev, iobase)    NULL
 #define ioat2_dca_init(pdev, iobase)   NULL
+#define ioat3_dca_init(pdev, iobase)   NULL
 #endif
 
 #endif /* IOATDMA_H */
index dd470fa91d86a4e10d8187e63d095125fb5a8526..f1ae2c776f7487b40e695395e3562679f420b29e 100644 (file)
@@ -35,6 +35,7 @@
 #define IOAT_PCI_SID            0x8086
 #define IOAT_VER_1_2            0x12    /* Version 1.2 */
 #define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
 
 struct ioat_dma_descriptor {
        uint32_t        size;
index 9832d7ebd931a0a76dafc6d66948b37e4a16da52..827cb503cac6979a0fa98e3fbd25a113e14fda27 100644 (file)
 #define IOAT_PCI_DMACTRL_DMA_EN                        0x00000001
 #define IOAT_PCI_DMACTRL_MSI_EN                        0x00000002
 
+#define IOAT_PCI_DEVICE_ID_OFFSET              0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET           0x148
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET                0x184
+
 /* MMIO Device Registers */
 #define IOAT_CHANCNT_OFFSET                    0x00    /*  8-bit */
 
 #define IOAT_DCA_GREQID_VALID       0x20000000
 #define IOAT_DCA_GREQID_LASTID      0x80000000
 
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
 
+#define IOAT3_DCA_GREQID_OFFSET     0x02
 
 #define IOAT1_CHAINADDR_OFFSET         0x0C    /* 64-bit Descriptor Chain Address Register */
 #define IOAT2_CHAINADDR_OFFSET         0x10    /* 64-bit Descriptor Chain Address Register */
index 0ec0f431e6a1d4bc30a179850ddadfbcec2d1ff3..85bfeba4d85eab01eb9e2c7b339eeccd674e2b0b 100644 (file)
@@ -82,17 +82,24 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
                        struct device *dev =
                                &iop_chan->device->pdev->dev;
                        u32 len = unmap->unmap_len;
-                       u32 src_cnt = unmap->unmap_src_cnt;
-                       dma_addr_t addr = iop_desc_get_dest_addr(unmap,
-                               iop_chan);
-
-                       dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
-                       while (src_cnt--) {
-                               addr = iop_desc_get_src_addr(unmap,
-                                                       iop_chan,
-                                                       src_cnt);
-                               dma_unmap_page(dev, addr, len,
-                                       DMA_TO_DEVICE);
+                       enum dma_ctrl_flags flags = desc->async_tx.flags;
+                       u32 src_cnt;
+                       dma_addr_t addr;
+
+                       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+                               addr = iop_desc_get_dest_addr(unmap, iop_chan);
+                               dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
+                       }
+
+                       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                               src_cnt = unmap->unmap_src_cnt;
+                               while (src_cnt--) {
+                                       addr = iop_desc_get_src_addr(unmap,
+                                                                    iop_chan,
+                                                                    src_cnt);
+                                       dma_unmap_page(dev, addr, len,
+                                                      DMA_TO_DEVICE);
+                               }
                        }
                        desc->group_head = NULL;
                }
@@ -366,8 +373,8 @@ retry:
        if (!retry++)
                goto retry;
 
-       /* try to free some slots if the allocation fails */
-       tasklet_schedule(&iop_chan->irq_tasklet);
+       /* perform direct reclaim if the allocation fails */
+       __iop_adma_slot_cleanup(iop_chan);
 
        return NULL;
 }
@@ -443,8 +450,18 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
 static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
 static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
 
-/* returns the number of allocated descriptors */
-static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
+/**
+ * iop_adma_alloc_chan_resources -  returns the number of allocated descriptors
+ * @chan - allocate descriptor resources for this channel
+ * @client - current client requesting the channel be ready for requests
+ *
+ * Note: We keep the slots for 1 operation on iop_chan->chain at all times.  To
+ * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
+ * greater than 2x the number slots needed to satisfy a device->max_xor
+ * request.
+ * */
+static int iop_adma_alloc_chan_resources(struct dma_chan *chan,
+                                        struct dma_client *client)
 {
        char *hw_desc;
        int idx;
@@ -838,7 +855,7 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
        dma_chan = container_of(device->common.channels.next,
                                struct dma_chan,
                                device_node);
-       if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+       if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) {
                err = -ENODEV;
                goto out;
        }
@@ -936,7 +953,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
        dma_chan = container_of(device->common.channels.next,
                                struct dma_chan,
                                device_node);
-       if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+       if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) {
                err = -ENODEV;
                goto out;
        }
@@ -1387,6 +1404,8 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
        spin_unlock_bh(&iop_chan->lock);
 }
 
+MODULE_ALIAS("platform:iop-adma");
+
 static struct platform_driver iop_adma_driver = {
        .probe          = iop_adma_probe,
        .remove         = iop_adma_remove,
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644 (file)
index 0000000..a4e4494
--- /dev/null
@@ -0,0 +1,1375 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <asm/plat-orion/mv_xor.h>
+#include "mv_xor.h"
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan)           \
+       container_of(chan, struct mv_xor_chan, common)
+
+#define to_mv_xor_device(dev)          \
+       container_of(dev, struct mv_xor_device, common)
+
+#define to_mv_xor_slot(tx)             \
+       container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+       struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+       hw_desc->status = (1 << 31);
+       hw_desc->phy_next_desc = 0;
+       hw_desc->desc_command = (1 << 31);
+}
+
+static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
+{
+       struct mv_xor_desc *hw_desc = desc->hw_desc;
+       return hw_desc->phy_dest_addr;
+}
+
+static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+                               int src_idx)
+{
+       struct mv_xor_desc *hw_desc = desc->hw_desc;
+       return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+                                  u32 byte_count)
+{
+       struct mv_xor_desc *hw_desc = desc->hw_desc;
+       hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+                                 u32 next_desc_addr)
+{
+       struct mv_xor_desc *hw_desc = desc->hw_desc;
+       BUG_ON(hw_desc->phy_next_desc);
+       hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+       struct mv_xor_desc *hw_desc = desc->hw_desc;
+       hw_desc->phy_next_desc = 0;
+}
+
+static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
+{
+       desc->value = val;
+}
+
+static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+                                 dma_addr_t addr)
+{
+       struct mv_xor_desc *hw_desc = desc->hw_desc;
+       hw_desc->phy_dest_addr = addr;
+}
+
+static int mv_chan_memset_slot_count(size_t len)
+{
+       return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+                                int index, dma_addr_t addr)
+{
+       struct mv_xor_desc *hw_desc = desc->hw_desc;
+       hw_desc->phy_src_addr[index] = addr;
+       if (desc->type == DMA_XOR)
+               hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+       return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+                                       u32 next_desc_addr)
+{
+       __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
+{
+       __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
+{
+       __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
+{
+       __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+       __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+       u32 val = __raw_readl(XOR_INTR_MASK(chan));
+       val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+       __raw_writel(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+       u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+       intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+       return intr_cause;
+}
+
+static int mv_is_err_intr(u32 intr_cause)
+{
+       if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+               return 1;
+
+       return 0;
+}
+
+static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+       u32 val = (1 << (1 + (chan->idx * 16)));
+       dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+       __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+       u32 val = 0xFFFF0000 >> (chan->idx * 16);
+       __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static int mv_can_chain(struct mv_xor_desc_slot *desc)
+{
+       struct mv_xor_desc_slot *chain_old_tail = list_entry(
+               desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+       if (chain_old_tail->type != desc->type)
+               return 0;
+       if (desc->type == DMA_MEMSET)
+               return 0;
+
+       return 1;
+}
+
+static void mv_set_mode(struct mv_xor_chan *chan,
+                              enum dma_transaction_type type)
+{
+       u32 op_mode;
+       u32 config = __raw_readl(XOR_CONFIG(chan));
+
+       switch (type) {
+       case DMA_XOR:
+               op_mode = XOR_OPERATION_MODE_XOR;
+               break;
+       case DMA_MEMCPY:
+               op_mode = XOR_OPERATION_MODE_MEMCPY;
+               break;
+       case DMA_MEMSET:
+               op_mode = XOR_OPERATION_MODE_MEMSET;
+               break;
+       default:
+               dev_printk(KERN_ERR, chan->device->common.dev,
+                          "error: unsupported operation %d.\n",
+                          type);
+               BUG();
+               return;
+       }
+
+       config &= ~0x7;
+       config |= op_mode;
+       __raw_writel(config, XOR_CONFIG(chan));
+       chan->current_type = type;
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+       u32 activation;
+
+       dev_dbg(chan->device->common.dev, " activate chan.\n");
+       activation = __raw_readl(XOR_ACTIVATION(chan));
+       activation |= 0x1;
+       __raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+       u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+       state = (state >> 4) & 0x3;
+
+       return (state == 1) ? 1 : 0;
+}
+
+static int mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+       return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+                             struct mv_xor_desc_slot *slot)
+{
+       dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+               __func__, __LINE__, slot);
+
+       slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+                                  struct mv_xor_desc_slot *sw_desc)
+{
+       dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+               __func__, __LINE__, sw_desc);
+       if (sw_desc->type != mv_chan->current_type)
+               mv_set_mode(mv_chan, sw_desc->type);
+
+       if (sw_desc->type == DMA_MEMSET) {
+               /* for memset requests we need to program the engine, no
+                * descriptors used.
+                */
+               struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+               mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+               mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+               mv_chan_set_value(mv_chan, sw_desc->value);
+       } else {
+               /* set the hardware chain */
+               mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+       }
+       mv_chan->pending += sw_desc->slot_cnt;
+       mv_xor_issue_pending(&mv_chan->common);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+       struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+       BUG_ON(desc->async_tx.cookie < 0);
+
+       if (desc->async_tx.cookie > 0) {
+               cookie = desc->async_tx.cookie;
+
+               /* call the callback (must not sleep or submit new
+                * operations to this channel)
+                */
+               if (desc->async_tx.callback)
+                       desc->async_tx.callback(
+                               desc->async_tx.callback_param);
+
+               /* unmap dma addresses
+                * (unmap_single vs unmap_page?)
+                */
+               if (desc->group_head && desc->unmap_len) {
+                       struct mv_xor_desc_slot *unmap = desc->group_head;
+                       struct device *dev =
+                               &mv_chan->device->pdev->dev;
+                       u32 len = unmap->unmap_len;
+                       enum dma_ctrl_flags flags = desc->async_tx.flags;
+                       u32 src_cnt;
+                       dma_addr_t addr;
+
+                       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+                               addr = mv_desc_get_dest_addr(unmap);
+                               dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
+                       }
+
+                       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                               src_cnt = unmap->unmap_src_cnt;
+                               while (src_cnt--) {
+                                       addr = mv_desc_get_src_addr(unmap,
+                                                                   src_cnt);
+                                       dma_unmap_page(dev, addr, len,
+                                                      DMA_TO_DEVICE);
+                               }
+                       }
+                       desc->group_head = NULL;
+               }
+       }
+
+       /* run dependent operations */
+       async_tx_run_dependencies(&desc->async_tx);
+
+       return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+       struct mv_xor_desc_slot *iter, *_iter;
+
+       dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+       list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+                                completed_node) {
+
+               if (async_tx_test_ack(&iter->async_tx)) {
+                       list_del(&iter->completed_node);
+                       mv_xor_free_slots(mv_chan, iter);
+               }
+       }
+       return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+       struct mv_xor_chan *mv_chan)
+{
+       dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+               __func__, __LINE__, desc, desc->async_tx.flags);
+       list_del(&desc->chain_node);
+       /* the client is allowed to attach dependent operations
+        * until 'ack' is set
+        */
+       if (!async_tx_test_ack(&desc->async_tx)) {
+               /* move this slot to the completed_slots */
+               list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+               return 0;
+       }
+
+       mv_xor_free_slots(mv_chan, desc);
+       return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+       struct mv_xor_desc_slot *iter, *_iter;
+       dma_cookie_t cookie = 0;
+       int busy = mv_chan_is_busy(mv_chan);
+       u32 current_desc = mv_chan_get_current_desc(mv_chan);
+       int seen_current = 0;
+
+       dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+       dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+       mv_xor_clean_completed_slots(mv_chan);
+
+       /* free completed slots from the chain starting with
+        * the oldest descriptor
+        */
+
+       list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+                                       chain_node) {
+               prefetch(_iter);
+               prefetch(&_iter->async_tx);
+
+               /* do not advance past the current descriptor loaded into the
+                * hardware channel, subsequent descriptors are either in
+                * process or have not been submitted
+                */
+               if (seen_current)
+                       break;
+
+               /* stop the search if we reach the current descriptor and the
+                * channel is busy
+                */
+               if (iter->async_tx.phys == current_desc) {
+                       seen_current = 1;
+                       if (busy)
+                               break;
+               }
+
+               cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+               if (mv_xor_clean_slot(iter, mv_chan))
+                       break;
+       }
+
+       if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+               struct mv_xor_desc_slot *chain_head;
+               chain_head = list_entry(mv_chan->chain.next,
+                                       struct mv_xor_desc_slot,
+                                       chain_node);
+
+               mv_xor_start_new_chain(mv_chan, chain_head);
+       }
+
+       if (cookie > 0)
+               mv_chan->completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+       spin_lock_bh(&mv_chan->lock);
+       __mv_xor_slot_cleanup(mv_chan);
+       spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+       struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+       __mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+                   int slots_per_op)
+{
+       struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+       LIST_HEAD(chain);
+       int slots_found, retry = 0;
+
+       /* start search from the last allocated descrtiptor
+        * if a contiguous allocation can not be found start searching
+        * from the beginning of the list
+        */
+retry:
+       slots_found = 0;
+       if (retry == 0)
+               iter = mv_chan->last_used;
+       else
+               iter = list_entry(&mv_chan->all_slots,
+                       struct mv_xor_desc_slot,
+                       slot_node);
+
+       list_for_each_entry_safe_continue(
+               iter, _iter, &mv_chan->all_slots, slot_node) {
+               prefetch(_iter);
+               prefetch(&_iter->async_tx);
+               if (iter->slots_per_op) {
+                       /* give up after finding the first busy slot
+                        * on the second pass through the list
+                        */
+                       if (retry)
+                               break;
+
+                       slots_found = 0;
+                       continue;
+               }
+
+               /* start the allocation if the slot is correctly aligned */
+               if (!slots_found++)
+                       alloc_start = iter;
+
+               if (slots_found == num_slots) {
+                       struct mv_xor_desc_slot *alloc_tail = NULL;
+                       struct mv_xor_desc_slot *last_used = NULL;
+                       iter = alloc_start;
+                       while (num_slots) {
+                               int i;
+
+                               /* pre-ack all but the last descriptor */
+                               async_tx_ack(&iter->async_tx);
+
+                               list_add_tail(&iter->chain_node, &chain);
+                               alloc_tail = iter;
+                               iter->async_tx.cookie = 0;
+                               iter->slot_cnt = num_slots;
+                               iter->xor_check_result = NULL;
+                               for (i = 0; i < slots_per_op; i++) {
+                                       iter->slots_per_op = slots_per_op - i;
+                                       last_used = iter;
+                                       iter = list_entry(iter->slot_node.next,
+                                               struct mv_xor_desc_slot,
+                                               slot_node);
+                               }
+                               num_slots -= slots_per_op;
+                       }
+                       alloc_tail->group_head = alloc_start;
+                       alloc_tail->async_tx.cookie = -EBUSY;
+                       list_splice(&chain, &alloc_tail->async_tx.tx_list);
+                       mv_chan->last_used = last_used;
+                       mv_desc_clear_next_desc(alloc_start);
+                       mv_desc_clear_next_desc(alloc_tail);
+                       return alloc_tail;
+               }
+       }
+       if (!retry++)
+               goto retry;
+
+       /* try to free some slots if the allocation fails */
+       tasklet_schedule(&mv_chan->irq_tasklet);
+
+       return NULL;
+}
+
+static dma_cookie_t
+mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
+                     struct mv_xor_desc_slot *desc)
+{
+       dma_cookie_t cookie = mv_chan->common.cookie;
+
+       if (++cookie < 0)
+               cookie = 1;
+       mv_chan->common.cookie = desc->async_tx.cookie = cookie;
+       return cookie;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+       struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+       struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+       dma_cookie_t cookie;
+       int new_hw_chain = 1;
+
+       dev_dbg(mv_chan->device->common.dev,
+               "%s sw_desc %p: async_tx %p\n",
+               __func__, sw_desc, &sw_desc->async_tx);
+
+       grp_start = sw_desc->group_head;
+
+       spin_lock_bh(&mv_chan->lock);
+       cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+
+       if (list_empty(&mv_chan->chain))
+               list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+       else {
+               new_hw_chain = 0;
+
+               old_chain_tail = list_entry(mv_chan->chain.prev,
+                                           struct mv_xor_desc_slot,
+                                           chain_node);
+               list_splice_init(&grp_start->async_tx.tx_list,
+                                &old_chain_tail->chain_node);
+
+               if (!mv_can_chain(grp_start))
+                       goto submit_done;
+
+               dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+                       old_chain_tail->async_tx.phys);
+
+               /* fix up the hardware chain */
+               mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+               /* if the channel is not busy */
+               if (!mv_chan_is_busy(mv_chan)) {
+                       u32 current_desc = mv_chan_get_current_desc(mv_chan);
+                       /*
+                        * and the curren desc is the end of the chain before
+                        * the append, then we need to start the channel
+                        */
+                       if (current_desc == old_chain_tail->async_tx.phys)
+                               new_hw_chain = 1;
+               }
+       }
+
+       if (new_hw_chain)
+               mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+       spin_unlock_bh(&mv_chan->lock);
+
+       return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan,
+                                      struct dma_client *client)
+{
+       char *hw_desc;
+       int idx;
+       struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+       struct mv_xor_desc_slot *slot = NULL;
+       struct mv_xor_platform_data *plat_data =
+               mv_chan->device->pdev->dev.platform_data;
+       int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+       /* Allocate descriptor slots */
+       idx = mv_chan->slots_allocated;
+       while (idx < num_descs_in_pool) {
+               slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+               if (!slot) {
+                       printk(KERN_INFO "MV XOR Channel only initialized"
+                               " %d descriptor slots", idx);
+                       break;
+               }
+               hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+               slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+               dma_async_tx_descriptor_init(&slot->async_tx, chan);
+               slot->async_tx.tx_submit = mv_xor_tx_submit;
+               INIT_LIST_HEAD(&slot->chain_node);
+               INIT_LIST_HEAD(&slot->slot_node);
+               INIT_LIST_HEAD(&slot->async_tx.tx_list);
+               hw_desc = (char *) mv_chan->device->dma_desc_pool;
+               slot->async_tx.phys =
+                       (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+               slot->idx = idx++;
+
+               spin_lock_bh(&mv_chan->lock);
+               mv_chan->slots_allocated = idx;
+               list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+               spin_unlock_bh(&mv_chan->lock);
+       }
+
+       if (mv_chan->slots_allocated && !mv_chan->last_used)
+               mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+                                       struct mv_xor_desc_slot,
+                                       slot_node);
+
+       dev_dbg(mv_chan->device->common.dev,
+               "allocated %d descriptor slots last_used: %p\n",
+               mv_chan->slots_allocated, mv_chan->last_used);
+
+       return mv_chan->slots_allocated ? : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+               size_t len, unsigned long flags)
+{
+       struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+       struct mv_xor_desc_slot *sw_desc, *grp_start;
+       int slot_cnt;
+
+       dev_dbg(mv_chan->device->common.dev,
+               "%s dest: %x src %x len: %u flags: %ld\n",
+               __func__, dest, src, len, flags);
+       if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+               return NULL;
+
+       BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+       spin_lock_bh(&mv_chan->lock);
+       slot_cnt = mv_chan_memcpy_slot_count(len);
+       sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+       if (sw_desc) {
+               sw_desc->type = DMA_MEMCPY;
+               sw_desc->async_tx.flags = flags;
+               grp_start = sw_desc->group_head;
+               mv_desc_init(grp_start, flags);
+               mv_desc_set_byte_count(grp_start, len);
+               mv_desc_set_dest_addr(sw_desc->group_head, dest);
+               mv_desc_set_src_addr(grp_start, 0, src);
+               sw_desc->unmap_src_cnt = 1;
+               sw_desc->unmap_len = len;
+       }
+       spin_unlock_bh(&mv_chan->lock);
+
+       dev_dbg(mv_chan->device->common.dev,
+               "%s sw_desc %p async_tx %p\n",
+               __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+       return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+                      size_t len, unsigned long flags)
+{
+       struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+       struct mv_xor_desc_slot *sw_desc, *grp_start;
+       int slot_cnt;
+
+       dev_dbg(mv_chan->device->common.dev,
+               "%s dest: %x len: %u flags: %ld\n",
+               __func__, dest, len, flags);
+       if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+               return NULL;
+
+       BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+       spin_lock_bh(&mv_chan->lock);
+       slot_cnt = mv_chan_memset_slot_count(len);
+       sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+       if (sw_desc) {
+               sw_desc->type = DMA_MEMSET;
+               sw_desc->async_tx.flags = flags;
+               grp_start = sw_desc->group_head;
+               mv_desc_init(grp_start, flags);
+               mv_desc_set_byte_count(grp_start, len);
+               mv_desc_set_dest_addr(sw_desc->group_head, dest);
+               mv_desc_set_block_fill_val(grp_start, value);
+               sw_desc->unmap_src_cnt = 1;
+               sw_desc->unmap_len = len;
+       }
+       spin_unlock_bh(&mv_chan->lock);
+       dev_dbg(mv_chan->device->common.dev,
+               "%s sw_desc %p async_tx %p \n",
+               __func__, sw_desc, &sw_desc->async_tx);
+       return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+                   unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+       struct mv_xor_desc_slot *sw_desc, *grp_start;
+       int slot_cnt;
+
+       if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+               return NULL;
+
+       BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+       dev_dbg(mv_chan->device->common.dev,
+               "%s src_cnt: %d len: dest %x %u flags: %ld\n",
+               __func__, src_cnt, len, dest, flags);
+
+       spin_lock_bh(&mv_chan->lock);
+       slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+       sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+       if (sw_desc) {
+               sw_desc->type = DMA_XOR;
+               sw_desc->async_tx.flags = flags;
+               grp_start = sw_desc->group_head;
+               mv_desc_init(grp_start, flags);
+               /* the byte count field is the same as in memcpy desc*/
+               mv_desc_set_byte_count(grp_start, len);
+               mv_desc_set_dest_addr(sw_desc->group_head, dest);
+               sw_desc->unmap_src_cnt = src_cnt;
+               sw_desc->unmap_len = len;
+               while (src_cnt--)
+                       mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+       }
+       spin_unlock_bh(&mv_chan->lock);
+       dev_dbg(mv_chan->device->common.dev,
+               "%s sw_desc %p async_tx %p \n",
+               __func__, sw_desc, &sw_desc->async_tx);
+       return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+       struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+       struct mv_xor_desc_slot *iter, *_iter;
+       int in_use_descs = 0;
+
+       mv_xor_slot_cleanup(mv_chan);
+
+       spin_lock_bh(&mv_chan->lock);
+       list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+                                       chain_node) {
+               in_use_descs++;
+               list_del(&iter->chain_node);
+       }
+       list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+                                completed_node) {
+               in_use_descs++;
+               list_del(&iter->completed_node);
+       }
+       list_for_each_entry_safe_reverse(
+               iter, _iter, &mv_chan->all_slots, slot_node) {
+               list_del(&iter->slot_node);
+               kfree(iter);
+               mv_chan->slots_allocated--;
+       }
+       mv_chan->last_used = NULL;
+
+       dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+               __func__, mv_chan->slots_allocated);
+       spin_unlock_bh(&mv_chan->lock);
+
+       if (in_use_descs)
+               dev_err(mv_chan->device->common.dev,
+                       "freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_is_complete - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ */
+static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
+                                         dma_cookie_t cookie,
+                                         dma_cookie_t *done,
+                                         dma_cookie_t *used)
+{
+       struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+       dma_cookie_t last_used;
+       dma_cookie_t last_complete;
+       enum dma_status ret;
+
+       last_used = chan->cookie;
+       last_complete = mv_chan->completed_cookie;
+       mv_chan->is_complete_cookie = cookie;
+       if (done)
+               *done = last_complete;
+       if (used)
+               *used = last_used;
+
+       ret = dma_async_is_complete(cookie, last_complete, last_used);
+       if (ret == DMA_SUCCESS) {
+               mv_xor_clean_completed_slots(mv_chan);
+               return ret;
+       }
+       mv_xor_slot_cleanup(mv_chan);
+
+       last_used = chan->cookie;
+       last_complete = mv_chan->completed_cookie;
+
+       if (done)
+               *done = last_complete;
+       if (used)
+               *used = last_used;
+
+       return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+       u32 val;
+
+       val = __raw_readl(XOR_CONFIG(chan));
+       dev_printk(KERN_ERR, chan->device->common.dev,
+                  "config       0x%08x.\n", val);
+
+       val = __raw_readl(XOR_ACTIVATION(chan));
+       dev_printk(KERN_ERR, chan->device->common.dev,
+                  "activation   0x%08x.\n", val);
+
+       val = __raw_readl(XOR_INTR_CAUSE(chan));
+       dev_printk(KERN_ERR, chan->device->common.dev,
+                  "intr cause   0x%08x.\n", val);
+
+       val = __raw_readl(XOR_INTR_MASK(chan));
+       dev_printk(KERN_ERR, chan->device->common.dev,
+                  "intr mask    0x%08x.\n", val);
+
+       val = __raw_readl(XOR_ERROR_CAUSE(chan));
+       dev_printk(KERN_ERR, chan->device->common.dev,
+                  "error cause  0x%08x.\n", val);
+
+       val = __raw_readl(XOR_ERROR_ADDR(chan));
+       dev_printk(KERN_ERR, chan->device->common.dev,
+                  "error addr   0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+                                        u32 intr_cause)
+{
+       if (intr_cause & (1 << 4)) {
+            dev_dbg(chan->device->common.dev,
+                    "ignore this error\n");
+            return;
+       }
+
+       dev_printk(KERN_ERR, chan->device->common.dev,
+                  "error on chan %d. intr cause 0x%08x.\n",
+                  chan->idx, intr_cause);
+
+       mv_dump_xor_regs(chan);
+       BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+       struct mv_xor_chan *chan = data;
+       u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+       dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+       if (mv_is_err_intr(intr_cause))
+               mv_xor_err_interrupt_handler(chan, intr_cause);
+
+       tasklet_schedule(&chan->irq_tasklet);
+
+       mv_xor_device_clear_eoc_cause(chan);
+
+       return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+       struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+       if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+               mv_chan->pending = 0;
+               mv_chan_activate(mv_chan);
+       }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+       int i;
+       void *src, *dest;
+       dma_addr_t src_dma, dest_dma;
+       struct dma_chan *dma_chan;
+       dma_cookie_t cookie;
+       struct dma_async_tx_descriptor *tx;
+       int err = 0;
+       struct mv_xor_chan *mv_chan;
+
+       src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+       if (!src)
+               return -ENOMEM;
+
+       dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+       if (!dest) {
+               kfree(src);
+               return -ENOMEM;
+       }
+
+       /* Fill in src buffer */
+       for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+               ((u8 *) src)[i] = (u8)i;
+
+       /* Start copy, using first DMA channel */
+       dma_chan = container_of(device->common.channels.next,
+                               struct dma_chan,
+                               device_node);
+       if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       dest_dma = dma_map_single(dma_chan->device->dev, dest,
+                                 MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+       src_dma = dma_map_single(dma_chan->device->dev, src,
+                                MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+       tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+                                   MV_XOR_TEST_SIZE, 0);
+       cookie = mv_xor_tx_submit(tx);
+       mv_xor_issue_pending(dma_chan);
+       async_tx_ack(tx);
+       msleep(1);
+
+       if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+           DMA_SUCCESS) {
+               dev_printk(KERN_ERR, dma_chan->device->dev,
+                          "Self-test copy timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       mv_chan = to_mv_xor_chan(dma_chan);
+       dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+                               MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+       if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+               dev_printk(KERN_ERR, dma_chan->device->dev,
+                          "Self-test copy failed compare, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+free_resources:
+       mv_xor_free_chan_resources(dma_chan);
+out:
+       kfree(src);
+       kfree(dest);
+       return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+       int i, src_idx;
+       struct page *dest;
+       struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+       dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+       dma_addr_t dest_dma;
+       struct dma_async_tx_descriptor *tx;
+       struct dma_chan *dma_chan;
+       dma_cookie_t cookie;
+       u8 cmp_byte = 0;
+       u32 cmp_word;
+       int err = 0;
+       struct mv_xor_chan *mv_chan;
+
+       for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+               xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+               if (!xor_srcs[src_idx])
+                       while (src_idx--) {
+                               __free_page(xor_srcs[src_idx]);
+                               return -ENOMEM;
+                       }
+       }
+
+       dest = alloc_page(GFP_KERNEL);
+       if (!dest)
+               while (src_idx--) {
+                       __free_page(xor_srcs[src_idx]);
+                       return -ENOMEM;
+               }
+
+       /* Fill in src buffers */
+       for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+               u8 *ptr = page_address(xor_srcs[src_idx]);
+               for (i = 0; i < PAGE_SIZE; i++)
+                       ptr[i] = (1 << src_idx);
+       }
+
+       for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+               cmp_byte ^= (u8) (1 << src_idx);
+
+       cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+               (cmp_byte << 8) | cmp_byte;
+
+       memset(page_address(dest), 0, PAGE_SIZE);
+
+       dma_chan = container_of(device->common.channels.next,
+                               struct dma_chan,
+                               device_node);
+       if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       /* test xor */
+       dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+                               DMA_FROM_DEVICE);
+
+       for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+               dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+                                          0, PAGE_SIZE, DMA_TO_DEVICE);
+
+       tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+                                MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+       cookie = mv_xor_tx_submit(tx);
+       mv_xor_issue_pending(dma_chan);
+       async_tx_ack(tx);
+       msleep(8);
+
+       if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+           DMA_SUCCESS) {
+               dev_printk(KERN_ERR, dma_chan->device->dev,
+                          "Self-test xor timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       mv_chan = to_mv_xor_chan(dma_chan);
+       dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+                               PAGE_SIZE, DMA_FROM_DEVICE);
+       for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+               u32 *ptr = page_address(dest);
+               if (ptr[i] != cmp_word) {
+                       dev_printk(KERN_ERR, dma_chan->device->dev,
+                                  "Self-test xor failed compare, disabling."
+                                  " index %d, data %x, expected %x\n", i,
+                                  ptr[i], cmp_word);
+                       err = -ENODEV;
+                       goto free_resources;
+               }
+       }
+
+free_resources:
+       mv_xor_free_chan_resources(dma_chan);
+out:
+       src_idx = MV_XOR_NUM_SRC_TEST;
+       while (src_idx--)
+               __free_page(xor_srcs[src_idx]);
+       __free_page(dest);
+       return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+       struct mv_xor_device *device = platform_get_drvdata(dev);
+       struct dma_chan *chan, *_chan;
+       struct mv_xor_chan *mv_chan;
+       struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+       dma_async_device_unregister(&device->common);
+
+       dma_free_coherent(&dev->dev, plat_data->pool_size,
+                       device->dma_desc_pool_virt, device->dma_desc_pool);
+
+       list_for_each_entry_safe(chan, _chan, &device->common.channels,
+                               device_node) {
+               mv_chan = to_mv_xor_chan(chan);
+               list_del(&chan->device_node);
+       }
+
+       return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+       int ret = 0;
+       int irq;
+       struct mv_xor_device *adev;
+       struct mv_xor_chan *mv_chan;
+       struct dma_device *dma_dev;
+       struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+       adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+       if (!adev)
+               return -ENOMEM;
+
+       dma_dev = &adev->common;
+
+       /* allocate coherent memory for hardware descriptors
+        * note: writecombine gives slightly better performance, but
+        * requires that we explicitly flush the writes
+        */
+       adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+                                                         plat_data->pool_size,
+                                                         &adev->dma_desc_pool,
+                                                         GFP_KERNEL);
+       if (!adev->dma_desc_pool_virt)
+               return -ENOMEM;
+
+       adev->id = plat_data->hw_id;
+
+       /* discover transaction capabilites from the platform data */
+       dma_dev->cap_mask = plat_data->cap_mask;
+       adev->pdev = pdev;
+       platform_set_drvdata(pdev, adev);
+
+       adev->shared = platform_get_drvdata(plat_data->shared);
+
+       INIT_LIST_HEAD(&dma_dev->channels);
+
+       /* set base routines */
+       dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+       dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+       dma_dev->device_is_tx_complete = mv_xor_is_complete;
+       dma_dev->device_issue_pending = mv_xor_issue_pending;
+       dma_dev->dev = &pdev->dev;
+
+       /* set prep routines based on capability */
+       if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+               dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+       if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+               dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+       if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+               dma_dev->max_xor = 8;                  ;
+               dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+       }
+
+       mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+       if (!mv_chan) {
+               ret = -ENOMEM;
+               goto err_free_dma;
+       }
+       mv_chan->device = adev;
+       mv_chan->idx = plat_data->hw_id;
+       mv_chan->mmr_base = adev->shared->xor_base;
+
+       if (!mv_chan->mmr_base) {
+               ret = -ENOMEM;
+               goto err_free_dma;
+       }
+       tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+                    mv_chan);
+
+       /* clear errors before enabling interrupts */
+       mv_xor_device_clear_err_status(mv_chan);
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0) {
+               ret = irq;
+               goto err_free_dma;
+       }
+       ret = devm_request_irq(&pdev->dev, irq,
+                              mv_xor_interrupt_handler,
+                              0, dev_name(&pdev->dev), mv_chan);
+       if (ret)
+               goto err_free_dma;
+
+       mv_chan_unmask_interrupts(mv_chan);
+
+       mv_set_mode(mv_chan, DMA_MEMCPY);
+
+       spin_lock_init(&mv_chan->lock);
+       INIT_LIST_HEAD(&mv_chan->chain);
+       INIT_LIST_HEAD(&mv_chan->completed_slots);
+       INIT_LIST_HEAD(&mv_chan->all_slots);
+       INIT_RCU_HEAD(&mv_chan->common.rcu);
+       mv_chan->common.device = dma_dev;
+
+       list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+       if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+               ret = mv_xor_memcpy_self_test(adev);
+               dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+               if (ret)
+                       goto err_free_dma;
+       }
+
+       if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+               ret = mv_xor_xor_self_test(adev);
+               dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+               if (ret)
+                       goto err_free_dma;
+       }
+
+       dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+         "( %s%s%s%s)\n",
+         dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+         dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
+         dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+         dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+       dma_async_device_register(dma_dev);
+       goto out;
+
+ err_free_dma:
+       dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+                       adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+       return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+                        struct mbus_dram_target_info *dram)
+{
+       void __iomem *base = msp->xor_base;
+       u32 win_enable = 0;
+       int i;
+
+       for (i = 0; i < 8; i++) {
+               writel(0, base + WINDOW_BASE(i));
+               writel(0, base + WINDOW_SIZE(i));
+               if (i < 4)
+                       writel(0, base + WINDOW_REMAP_HIGH(i));
+       }
+
+       for (i = 0; i < dram->num_cs; i++) {
+               struct mbus_dram_window *cs = dram->cs + i;
+
+               writel((cs->base & 0xffff0000) |
+                      (cs->mbus_attr << 8) |
+                      dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+               writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+               win_enable |= (1 << i);
+               win_enable |= 3 << (16 + (2 * i));
+       }
+
+       writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+       writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+       .probe          = mv_xor_probe,
+       .remove         = mv_xor_remove,
+       .driver         = {
+               .owner  = THIS_MODULE,
+               .name   = MV_XOR_NAME,
+       },
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+       struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+       struct mv_xor_shared_private *msp;
+       struct resource *res;
+
+       dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+       msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+       if (!msp)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+
+       msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+                                    res->end - res->start + 1);
+       if (!msp->xor_base)
+               return -EBUSY;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (!res)
+               return -ENODEV;
+
+       msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+                                         res->end - res->start + 1);
+       if (!msp->xor_high_base)
+               return -EBUSY;
+
+       platform_set_drvdata(pdev, msp);
+
+       /*
+        * (Re-)program MBUS remapping windows if we are asked to.
+        */
+       if (msd != NULL && msd->dram != NULL)
+               mv_xor_conf_mbus_windows(msp, msd->dram);
+
+       return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+       return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+       .probe          = mv_xor_shared_probe,
+       .remove         = mv_xor_shared_remove,
+       .driver         = {
+               .owner  = THIS_MODULE,
+               .name   = MV_XOR_SHARED_NAME,
+       },
+};
+
+
+static int __init mv_xor_init(void)
+{
+       int rc;
+
+       rc = platform_driver_register(&mv_xor_shared_driver);
+       if (!rc) {
+               rc = platform_driver_register(&mv_xor_driver);
+               if (rc)
+                       platform_driver_unregister(&mv_xor_shared_driver);
+       }
+       return rc;
+}
+module_init(mv_xor_init);
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+       platform_driver_unregister(&mv_xor_driver);
+       platform_driver_unregister(&mv_xor_shared_driver);
+       return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644 (file)
index 0000000..06cafe1
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE               64
+#define MV_XOR_THRESHOLD               1
+
+#define XOR_OPERATION_MODE_XOR         0
+#define XOR_OPERATION_MODE_MEMCPY      2
+#define XOR_OPERATION_MODE_MEMSET      4
+
+#define XOR_CURR_DESC(chan)    (chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan)    (chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan)   (chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan)   (chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan)       (chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan)      (chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan)       (chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan)   (chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan)   (chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan)    (chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan)  (chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan)   (chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE    0x3F5
+
+#define WINDOW_BASE(w)         (0x250 + ((w) << 2))
+#define WINDOW_SIZE(w)         (0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w)   (0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan)        (0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+       void __iomem    *xor_base;
+       void __iomem    *xor_high_base;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+       struct platform_device          *pdev;
+       int                             id;
+       dma_addr_t                      dma_desc_pool;
+       void                            *dma_desc_pool_virt;
+       struct dma_device               common;
+       struct mv_xor_shared_private    *shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+       int                     pending;
+       dma_cookie_t            completed_cookie;
+       spinlock_t              lock; /* protects the descriptor slot pool */
+       void __iomem            *mmr_base;
+       unsigned int            idx;
+       enum dma_transaction_type       current_type;
+       struct list_head        chain;
+       struct list_head        completed_slots;
+       struct mv_xor_device    *device;
+       struct dma_chan         common;
+       struct mv_xor_desc_slot *last_used;
+       struct list_head        all_slots;
+       int                     slots_allocated;
+       struct tasklet_struct   irq_tasklet;
+#ifdef USE_TIMER
+       unsigned long           cleanup_time;
+       u32                     current_on_last_cleanup;
+       dma_cookie_t            is_complete_cookie;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @async_tx: support for the async_tx api
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ *     for example transfer lengths larger than the supported hw max
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+       struct list_head        slot_node;
+       struct list_head        chain_node;
+       struct list_head        completed_node;
+       enum dma_transaction_type       type;
+       void                    *hw_desc;
+       struct mv_xor_desc_slot *group_head;
+       u16                     slot_cnt;
+       u16                     slots_per_op;
+       u16                     idx;
+       u16                     unmap_src_cnt;
+       u32                     value;
+       size_t                  unmap_len;
+       struct dma_async_tx_descriptor  async_tx;
+       union {
+               u32             *xor_check_result;
+               u32             *crc32_result;
+       };
+#ifdef USE_TIMER
+       unsigned long           arrival_time;
+       struct timer_list       timeout;
+#endif
+};
+
+/* This structure describes XOR descriptor size 64bytes        */
+struct mv_xor_desc {
+       u32 status;             /* descriptor execution status */
+       u32 crc32_result;       /* result of CRC-32 calculation */
+       u32 desc_command;       /* type of operation to be carried out */
+       u32 phy_next_desc;      /* next descriptor address pointer */
+       u32 byte_count;         /* size of src/dst blocks in bytes */
+       u32 phy_dest_addr;      /* destination block address */
+       u32 phy_src_addr[8];    /* source block addresses */
+       u32 reserved0;
+       u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc)            \
+       container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx)      \
+       ((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#define MV_XOR_MIN_BYTE_COUNT  (128)
+#define XOR_MAX_BYTE_COUNT     ((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT  XOR_MAX_BYTE_COUNT
+
+
+#endif
index 90d14ee564f567c2515dbced773eebda2d494063..ef4f5da2029f30af6e493e4ff9082d34abd0c40c 100644 (file)
@@ -198,17 +198,13 @@ iop_chan_memset_slot_count(size_t len, int *slots_per_op)
 static inline int
 iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
 {
-       int num_slots;
-       /* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1
-        * (1 source => 8 bytes) (1 slot => 32 bytes)
-        */
-       num_slots = 1 + (((src_cnt - 1) << 3) >> 5);
-       if (((src_cnt - 1) << 3) & 0x1f)
-               num_slots++;
-
-       *slots_per_op = num_slots;
-
-       return num_slots;
+       static const char slot_count_table[] = { 1, 2, 2, 2,
+                                                2, 3, 3, 3,
+                                                3, 4, 4, 4,
+                                                4, 5, 5, 5,
+                                               };
+       *slots_per_op = slot_count_table[src_cnt - 1];
+       return *slots_per_op;
 }
 
 #define ADMA_MAX_BYTE_COUNT    (16 * 1024 * 1024)
index a32b86ac62aa4f4e325dd7c035270a88b072c392..af64676650a22b7f6817d0d809d4fc6d82d8966d 100644 (file)
@@ -260,7 +260,7 @@ static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
 static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
                                        int *slots_per_op)
 {
-       static const int slot_count_table[] = { 0,
+       static const char slot_count_table[] = {
                                                1, 1, 1, 1, /* 01 - 04 */
                                                2, 2, 2, 2, /* 05 - 08 */
                                                4, 4, 4, 4, /* 09 - 12 */
@@ -270,7 +270,7 @@ static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
                                                8, 8, 8, 8, /* 25 - 28 */
                                                8, 8, 8, 8, /* 29 - 32 */
                                              };
-       *slots_per_op = slot_count_table[src_cnt];
+       *slots_per_op = slot_count_table[src_cnt - 1];
        return *slots_per_op;
 }
 
diff --git a/include/asm-arm/plat-orion/mv_xor.h b/include/asm-arm/plat-orion/mv_xor.h
new file mode 100644 (file)
index 0000000..c349e8f
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Marvell XOR platform device data definition file.
+ */
+
+#ifndef __ASM_PLAT_ORION_MV_XOR_H
+#define __ASM_PLAT_ORION_MV_XOR_H
+
+#include <linux/dmaengine.h>
+#include <linux/mbus.h>
+
+#define MV_XOR_SHARED_NAME     "mv_xor_shared"
+#define MV_XOR_NAME            "mv_xor"
+
+struct mbus_dram_target_info;
+
+struct mv_xor_platform_shared_data {
+       struct mbus_dram_target_info    *dram;
+};
+
+struct mv_xor_platform_data {
+       struct platform_device          *shared;
+       int                             hw_id;
+       dma_cap_mask_t                  cap_mask;
+       size_t                          pool_size;
+};
+
+
+#endif
index 31e48b0e732414aff872d901dd4c0f33a5f5b54d..d18a3053be0d33b909ce665b41aaa62211e37a0a 100644 (file)
 #define GPIO_PIN_PD(N) (GPIO_PIOD_BASE + (N))
 #define GPIO_PIN_PE(N) (GPIO_PIOE_BASE + (N))
 
+
+/*
+ * DMAC peripheral hardware handshaking interfaces, used with dw_dmac
+ */
+#define DMAC_MCI_RX            0
+#define DMAC_MCI_TX            1
+#define DMAC_DAC_TX            2
+#define DMAC_AC97_A_RX         3
+#define DMAC_AC97_A_TX         4
+#define DMAC_AC97_B_RX         5
+#define DMAC_AC97_B_TX         6
+#define DMAC_DMAREQ_0          7
+#define DMAC_DMAREQ_1          8
+#define DMAC_DMAREQ_2          9
+#define DMAC_DMAREQ_3          10
+
 #endif /* __ASM_ARCH_AT32AP700X_H__ */
index eb640f0acfacd629ee179c4b10e639d56a04b93a..0f50d4cc4360e65d3453704aa400448f99cb95a6 100644 (file)
@@ -101,21 +101,14 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
 
 /**
  * async_tx_sync_epilog - actions to take if an operation is run synchronously
- * @flags: async_tx flags
- * @depend_tx: transaction depends on depend_tx
  * @cb_fn: function to call when the transaction completes
  * @cb_fn_param: parameter to pass to the callback routine
  */
 static inline void
-async_tx_sync_epilog(unsigned long flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param)
+async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
 {
        if (cb_fn)
                cb_fn(cb_fn_param);
-
-       if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
-               async_tx_ack(depend_tx);
 }
 
 void
@@ -152,4 +145,6 @@ struct dma_async_tx_descriptor *
 async_trigger_callback(enum async_tx_flags flags,
        struct dma_async_tx_descriptor *depend_tx,
        dma_async_tx_callback cb_fn, void *cb_fn_param);
+
+void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
index af61cd1f37e96c1e019fd812c45a938de03c3944..b00a753eda53e6af1e50e7d892defdcb402913d8 100644 (file)
@@ -10,6 +10,7 @@ void dca_unregister_notify(struct notifier_block *nb);
 #define DCA_PROVIDER_REMOVE  0x0002
 
 struct dca_provider {
+       struct list_head        node;
        struct dca_ops          *ops;
        struct device           *cd;
        int                      id;
@@ -18,7 +19,9 @@ struct dca_provider {
 struct dca_ops {
        int     (*add_requester)    (struct dca_provider *, struct device *);
        int     (*remove_requester) (struct dca_provider *, struct device *);
-       u8      (*get_tag)          (struct dca_provider *, int cpu);
+       u8      (*get_tag)          (struct dca_provider *, struct device *,
+                                    int cpu);
+       int     (*dev_managed)      (struct dca_provider *, struct device *);
 };
 
 struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
@@ -32,9 +35,11 @@ static inline void *dca_priv(struct dca_provider *dca)
 }
 
 /* Requester API */
+#define DCA_GET_TAG_TWO_ARGS
 int dca_add_requester(struct device *dev);
 int dca_remove_requester(struct device *dev);
 u8 dca_get_tag(int cpu);
+u8 dca3_get_tag(struct device *dev, int cpu);
 
 /* internal stuff */
 int __init dca_sysfs_init(void);
index d08a5c5eb928f3f2d8ef8639a518b0ac37fb6c89..adb0b084eb5a413c601d4a8489e006e061906840 100644 (file)
@@ -89,10 +89,23 @@ enum dma_transaction_type {
        DMA_MEMSET,
        DMA_MEMCPY_CRC32C,
        DMA_INTERRUPT,
+       DMA_SLAVE,
 };
 
 /* last transaction type for creation of the capabilities mask */
-#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1)
+#define DMA_TX_TYPE_END (DMA_SLAVE + 1)
+
+/**
+ * enum dma_slave_width - DMA slave register access width.
+ * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses
+ * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses
+ * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses
+ */
+enum dma_slave_width {
+       DMA_SLAVE_WIDTH_8BIT,
+       DMA_SLAVE_WIDTH_16BIT,
+       DMA_SLAVE_WIDTH_32BIT,
+};
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
@@ -102,10 +115,14 @@ enum dma_transaction_type {
  * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
  *     acknowledges receipt, i.e. has has a chance to establish any
  *     dependency chains
+ * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
+ * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
  */
 enum dma_ctrl_flags {
        DMA_PREP_INTERRUPT = (1 << 0),
        DMA_CTRL_ACK = (1 << 1),
+       DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
+       DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
 };
 
 /**
@@ -114,6 +131,32 @@ enum dma_ctrl_flags {
  */
 typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
 
+/**
+ * struct dma_slave - Information about a DMA slave
+ * @dev: device acting as DMA slave
+ * @dma_dev: required DMA master device. If non-NULL, the client can not be
+ *     bound to other masters than this.
+ * @tx_reg: physical address of data register used for
+ *     memory-to-peripheral transfers
+ * @rx_reg: physical address of data register used for
+ *     peripheral-to-memory transfers
+ * @reg_width: peripheral register width
+ *
+ * If dma_dev is non-NULL, the client can not be bound to other DMA
+ * masters than the one corresponding to this device. The DMA master
+ * driver may use this to determine if there is controller-specific
+ * data wrapped around this struct. Drivers of platform code that sets
+ * the dma_dev field must therefore make sure to use an appropriate
+ * controller-specific dma slave structure wrapping this struct.
+ */
+struct dma_slave {
+       struct device           *dev;
+       struct device           *dma_dev;
+       dma_addr_t              tx_reg;
+       dma_addr_t              rx_reg;
+       enum dma_slave_width    reg_width;
+};
+
 /**
  * struct dma_chan_percpu - the per-CPU part of struct dma_chan
  * @refcount: local_t used for open-coded "bigref" counting
@@ -139,6 +182,7 @@ struct dma_chan_percpu {
  * @rcu: the DMA channel's RCU head
  * @device_node: used to add this to the device chan list
  * @local: per-cpu pointer to a struct dma_chan_percpu
+ * @client-count: how many clients are using this channel
  */
 struct dma_chan {
        struct dma_device *device;
@@ -154,6 +198,7 @@ struct dma_chan {
 
        struct list_head device_node;
        struct dma_chan_percpu *local;
+       int client_count;
 };
 
 #define to_dma_chan(p) container_of(p, struct dma_chan, dev)
@@ -202,11 +247,14 @@ typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
  * @event_callback: func ptr to call when something happens
  * @cap_mask: only return channels that satisfy the requested capabilities
  *  a value of zero corresponds to any capability
+ * @slave: data for preparing slave transfer. Must be non-NULL iff the
+ *  DMA_SLAVE capability is requested.
  * @global_node: list_head for global dma_client_list
  */
 struct dma_client {
        dma_event_callback      event_callback;
        dma_cap_mask_t          cap_mask;
+       struct dma_slave        *slave;
        struct list_head        global_node;
 };
 
@@ -263,6 +311,8 @@ struct dma_async_tx_descriptor {
  * @device_prep_dma_zero_sum: prepares a zero_sum operation
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
+ * @device_prep_slave_sg: prepares a slave dma operation
+ * @device_terminate_all: terminate all pending operations
  * @device_issue_pending: push pending transactions to hardware
  */
 struct dma_device {
@@ -279,7 +329,8 @@ struct dma_device {
        int dev_id;
        struct device *dev;
 
-       int (*device_alloc_chan_resources)(struct dma_chan *chan);
+       int (*device_alloc_chan_resources)(struct dma_chan *chan,
+                       struct dma_client *client);
        void (*device_free_chan_resources)(struct dma_chan *chan);
 
        struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
@@ -297,6 +348,12 @@ struct dma_device {
        struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
                struct dma_chan *chan, unsigned long flags);
 
+       struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
+               struct dma_chan *chan, struct scatterlist *sgl,
+               unsigned int sg_len, enum dma_data_direction direction,
+               unsigned long flags);
+       void (*device_terminate_all)(struct dma_chan *chan);
+
        enum dma_status (*device_is_tx_complete)(struct dma_chan *chan,
                        dma_cookie_t cookie, dma_cookie_t *last,
                        dma_cookie_t *used);
@@ -318,16 +375,14 @@ dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
        struct dma_chan *chan);
 
-static inline void
-async_tx_ack(struct dma_async_tx_descriptor *tx)
+static inline void async_tx_ack(struct dma_async_tx_descriptor *tx)
 {
        tx->flags |= DMA_CTRL_ACK;
 }
 
-static inline int
-async_tx_test_ack(struct dma_async_tx_descriptor *tx)
+static inline bool async_tx_test_ack(struct dma_async_tx_descriptor *tx)
 {
-       return tx->flags & DMA_CTRL_ACK;
+       return (tx->flags & DMA_CTRL_ACK) == DMA_CTRL_ACK;
 }
 
 #define first_dma_cap(mask) __first_dma_cap(&(mask))
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
new file mode 100644 (file)
index 0000000..04d217b
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on
+ * AVR32 systems.)
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef DW_DMAC_H
+#define DW_DMAC_H
+
+#include <linux/dmaengine.h>
+
+/**
+ * struct dw_dma_platform_data - Controller configuration parameters
+ * @nr_channels: Number of channels supported by hardware (max 8)
+ */
+struct dw_dma_platform_data {
+       unsigned int    nr_channels;
+};
+
+/**
+ * struct dw_dma_slave - Controller-specific information about a slave
+ * @slave: Generic information about the slave
+ * @ctl_lo: Platform-specific initializer for the CTL_LO register
+ * @cfg_hi: Platform-specific initializer for the CFG_HI register
+ * @cfg_lo: Platform-specific initializer for the CFG_LO register
+ */
+struct dw_dma_slave {
+       struct dma_slave        slave;
+       u32                     cfg_hi;
+       u32                     cfg_lo;
+};
+
+/* Platform-configurable bits in CFG_HI */
+#define DWC_CFGH_FCMODE                (1 << 0)
+#define DWC_CFGH_FIFO_MODE     (1 << 1)
+#define DWC_CFGH_PROTCTL(x)    ((x) << 2)
+#define DWC_CFGH_SRC_PER(x)    ((x) << 7)
+#define DWC_CFGH_DST_PER(x)    ((x) << 11)
+
+/* Platform-configurable bits in CFG_LO */
+#define DWC_CFGL_PRIO(x)       ((x) << 5)      /* priority */
+#define DWC_CFGL_LOCK_CH_XFER  (0 << 12)       /* scope of LOCK_CH */
+#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12)
+#define DWC_CFGL_LOCK_CH_XACT  (2 << 12)
+#define DWC_CFGL_LOCK_BUS_XFER (0 << 14)       /* scope of LOCK_BUS */
+#define DWC_CFGL_LOCK_BUS_BLOCK        (1 << 14)
+#define DWC_CFGL_LOCK_BUS_XACT (2 << 14)
+#define DWC_CFGL_LOCK_CH       (1 << 15)       /* channel lockout */
+#define DWC_CFGL_LOCK_BUS      (1 << 16)       /* busmaster lockout */
+#define DWC_CFGL_HS_DST_POL    (1 << 18)       /* dst handshake active low */
+#define DWC_CFGL_HS_SRC_POL    (1 << 19)       /* src handshake active low */
+
+static inline struct dw_dma_slave *to_dw_dma_slave(struct dma_slave *slave)
+{
+       return container_of(slave, struct dw_dma_slave, slave);
+}
+
+#endif /* DW_DMAC_H */
index d8507eb394cf6f50e4c622b53d13e0c417b6467c..119ae7b8f028cb9795fddb16a28abc7530294764 100644 (file)
 #define PCI_DEVICE_ID_INTEL_ICH9_7     0x2916
 #define PCI_DEVICE_ID_INTEL_ICH9_8     0x2918
 #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG4  0x3429
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG5  0x342a
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG6  0x342b
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG7  0x342c
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG0  0x3430
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG1  0x3431
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG2  0x3432
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG3  0x3433
 #define PCI_DEVICE_ID_INTEL_82830_HB   0x3575
 #define PCI_DEVICE_ID_INTEL_82830_CGC  0x3577
 #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580
index c77aff9c6eb3cc76fab911cdca097cc5add01d8c..8c6b706963ff01b02c87a54f2c70103e7f7f8f29 100644 (file)
@@ -34,6 +34,7 @@
 #define NET_DMA_DEFAULT_COPYBREAK 4096
 
 int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
+EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
 
 /**
  *     dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.