Merge tag 'dmaengine-fix-5.6-rc5' of git://git.infradead.org/users/vkoul/slave-dma
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 5 Mar 2020 18:19:34 +0000 (12:19 -0600)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 5 Mar 2020 18:19:34 +0000 (12:19 -0600)
Pull dmaengine fixes from Vinod Koul:
 "A bunch of driver fixes:

   - Doc updates to clean warnings for dmaengine

   - Fixes for newly added Intel idxd driver

   - More fixes for newly added TI k3-udma driver

   - Fixes for IMX and Tegra drivers"

* tag 'dmaengine-fix-5.6-rc5' of git://git.infradead.org/users/vkoul/slave-dma:
  dmaengine: imx-sdma: Fix the event id check to include RX event for UART6
  dmaengine: tegra-apb: Prevent race conditions of tasklet vs free list
  dmaengine: tegra-apb: Fix use-after-free
  dmaengine: imx-sdma: fix context cache
  dmaengine: idxd: wq size configuration needs to check global max size
  dmaengine: idxd: sysfs input of wq incorrect wq type should return error
  dmaengine: coh901318: Fix a double lock bug in dma_tc_handle()
  dmaengine: idxd: correct reserved token calculation
  dmaengine: ti: k3-udma: Fix terminated transfer handling
  dmaengine: ti: k3-udma: Use the channel direction in pause/resume functions
  dmaengine: ti: k3-udma: Use the TR counter helper for slave_sg and cyclic
  dmaengine: ti: k3-udma: Move the TR counter calculation to helper function
  dmaengine: ti: k3-udma: Workaround for RX teardown with stale data in peer
  dmaengine: ti: k3-udma: Use ktime/usleep_range based TX completion check
  dmaengine: idxd: Fix error handling in idxd_wq_cdev_dev_setup()
  dmaengine: doc: fix warnings/issues of client.rst
  dmaengine: idxd: fix runaway module ref count on device driver bind

Documentation/driver-api/dmaengine/client.rst
drivers/dma/coh901318.c
drivers/dma/idxd/cdev.c
drivers/dma/idxd/sysfs.c
drivers/dma/imx-sdma.c
drivers/dma/tegra20-apb-dma.c
drivers/dma/ti/k3-udma.c

index e5953e7e4bf429a20afd44234efa48df8e2c1003..2104830a99aeff529f54df726db45451075ea86c 100644 (file)
@@ -151,8 +151,8 @@ The details of these operations are:
      Note that callbacks will always be invoked from the DMA
      engines tasklet, never from interrupt context.
 
-Optional: per descriptor metadata
----------------------------------
+  **Optional: per descriptor metadata**
+
   DMAengine provides two ways for metadata support.
 
   DESC_METADATA_CLIENT
@@ -199,12 +199,15 @@ Optional: per descriptor metadata
   DESC_METADATA_CLIENT
 
     - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM:
+
       1. prepare the descriptor (dmaengine_prep_*)
          construct the metadata in the client's buffer
       2. use dmaengine_desc_attach_metadata() to attach the buffer to the
          descriptor
       3. submit the transfer
+
     - DMA_DEV_TO_MEM:
+
       1. prepare the descriptor (dmaengine_prep_*)
       2. use dmaengine_desc_attach_metadata() to attach the buffer to the
          descriptor
@@ -215,6 +218,7 @@ Optional: per descriptor metadata
   DESC_METADATA_ENGINE
 
     - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM:
+
       1. prepare the descriptor (dmaengine_prep_*)
       2. use dmaengine_desc_get_metadata_ptr() to get the pointer to the
          engine's metadata area
@@ -222,7 +226,9 @@ Optional: per descriptor metadata
       4. use dmaengine_desc_set_metadata_len()  to tell the DMA engine the
          amount of data the client has placed into the metadata buffer
       5. submit the transfer
+
     - DMA_DEV_TO_MEM:
+
       1. prepare the descriptor (dmaengine_prep_*)
       2. submit the transfer
       3. on transfer completion, use dmaengine_desc_get_metadata_ptr() to get
@@ -278,8 +284,8 @@ Optional: per descriptor metadata
 
       void dma_async_issue_pending(struct dma_chan *chan);
 
-Further APIs:
--------------
+Further APIs
+------------
 
 1. Terminate APIs
 
index e51d836afcc7765d37ad5b084404aeebd13a8d69..1092d4ce723e436e7f3971bafcbeb084e505b12f 100644 (file)
@@ -1947,8 +1947,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc)
                return;
        }
 
-       spin_lock(&cohc->lock);
-
        /*
         * When we reach this point, at least one queue item
         * should have been moved over from cohc->queue to
@@ -1969,8 +1967,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc)
        if (coh901318_queue_start(cohc) == NULL)
                cohc->busy = 0;
 
-       spin_unlock(&cohc->lock);
-
        /*
         * This tasklet will remove items from cohc->active
         * and thus terminates them.
index 1d7347825b95a8a21d0fec1bda397a6093509ffc..df47be612ebb09f5b7c25fb1abb6767be190c093 100644 (file)
@@ -204,6 +204,7 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
        minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
        if (minor < 0) {
                rc = minor;
+               kfree(dev);
                goto ida_err;
        }
 
@@ -212,7 +213,6 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
        rc = device_register(dev);
        if (rc < 0) {
                dev_err(&idxd->pdev->dev, "device register failed\n");
-               put_device(dev);
                goto dev_reg_err;
        }
        idxd_cdev->minor = minor;
@@ -221,8 +221,8 @@ static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
 
  dev_reg_err:
        ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt));
+       put_device(dev);
  ida_err:
-       kfree(dev);
        idxd_cdev->dev = NULL;
        return rc;
 }
index 6d907fe150aa4d508337b6f86d38da237f456318..6ca6e520a2fa4943e2b895f1816408de77d6c6e2 100644 (file)
@@ -124,6 +124,7 @@ static int idxd_config_bus_probe(struct device *dev)
                rc = idxd_device_config(idxd);
                if (rc < 0) {
                        spin_unlock_irqrestore(&idxd->dev_lock, flags);
+                       module_put(THIS_MODULE);
                        dev_warn(dev, "Device config failed: %d\n", rc);
                        return rc;
                }
@@ -132,6 +133,7 @@ static int idxd_config_bus_probe(struct device *dev)
                rc = idxd_device_enable(idxd);
                if (rc < 0) {
                        spin_unlock_irqrestore(&idxd->dev_lock, flags);
+                       module_put(THIS_MODULE);
                        dev_warn(dev, "Device enable failed: %d\n", rc);
                        return rc;
                }
@@ -142,6 +144,7 @@ static int idxd_config_bus_probe(struct device *dev)
                rc = idxd_register_dma_device(idxd);
                if (rc < 0) {
                        spin_unlock_irqrestore(&idxd->dev_lock, flags);
+                       module_put(THIS_MODULE);
                        dev_dbg(dev, "Failed to register dmaengine device\n");
                        return rc;
                }
@@ -516,7 +519,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
        if (val > idxd->max_tokens)
                return -EINVAL;
 
-       if (val > idxd->nr_tokens)
+       if (val > idxd->nr_tokens + group->tokens_reserved)
                return -EINVAL;
 
        group->tokens_reserved = val;
@@ -901,6 +904,20 @@ static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr,
        return sprintf(buf, "%u\n", wq->size);
 }
 
+static int total_claimed_wq_size(struct idxd_device *idxd)
+{
+       int i;
+       int wq_size = 0;
+
+       for (i = 0; i < idxd->max_wqs; i++) {
+               struct idxd_wq *wq = &idxd->wqs[i];
+
+               wq_size += wq->size;
+       }
+
+       return wq_size;
+}
+
 static ssize_t wq_size_store(struct device *dev,
                             struct device_attribute *attr, const char *buf,
                             size_t count)
@@ -920,7 +937,7 @@ static ssize_t wq_size_store(struct device *dev,
        if (wq->state != IDXD_WQ_DISABLED)
                return -EPERM;
 
-       if (size > idxd->max_wq_size)
+       if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size)
                return -EINVAL;
 
        wq->size = size;
@@ -999,12 +1016,14 @@ static ssize_t wq_type_store(struct device *dev,
                return -EPERM;
 
        old_type = wq->type;
-       if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
+       if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_NONE]))
+               wq->type = IDXD_WQT_NONE;
+       else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
                wq->type = IDXD_WQT_KERNEL;
        else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_USER]))
                wq->type = IDXD_WQT_USER;
        else
-               wq->type = IDXD_WQT_NONE;
+               return -EINVAL;
 
        /* If we are changing queue type, clear the name */
        if (wq->type != old_type)
index 066b21a3223261f26a73a71266d3b0a3e67bb51d..4d4477df4ede751a9b4277833d851dc48fe8641f 100644 (file)
@@ -1331,13 +1331,14 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 
        sdma_channel_synchronize(chan);
 
-       if (sdmac->event_id0)
+       if (sdmac->event_id0 >= 0)
                sdma_event_disable(sdmac, sdmac->event_id0);
        if (sdmac->event_id1)
                sdma_event_disable(sdmac, sdmac->event_id1);
 
        sdmac->event_id0 = 0;
        sdmac->event_id1 = 0;
+       sdmac->context_loaded = false;
 
        sdma_set_channel_priority(sdmac, 0);
 
@@ -1631,7 +1632,7 @@ static int sdma_config(struct dma_chan *chan,
        memcpy(&sdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg));
 
        /* Set ENBLn earlier to make sure dma request triggered after that */
-       if (sdmac->event_id0) {
+       if (sdmac->event_id0 >= 0) {
                if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events)
                        return -EINVAL;
                sdma_event_enable(sdmac, sdmac->event_id0);
index 3a45079d11ecff40db8ea5b1824c55894a7429e7..4a750e29bfb533953584bc101f6e5eec99cc0239 100644 (file)
@@ -281,7 +281,7 @@ static struct tegra_dma_desc *tegra_dma_desc_get(
 
        /* Do not allocate if desc are waiting for ack */
        list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
-               if (async_tx_test_ack(&dma_desc->txd)) {
+               if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) {
                        list_del(&dma_desc->node);
                        spin_unlock_irqrestore(&tdc->lock, flags);
                        dma_desc->txd.flags = 0;
@@ -756,10 +756,6 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
        bool was_busy;
 
        spin_lock_irqsave(&tdc->lock, flags);
-       if (list_empty(&tdc->pending_sg_req)) {
-               spin_unlock_irqrestore(&tdc->lock, flags);
-               return 0;
-       }
 
        if (!tdc->busy)
                goto skip_dma_stop;
index ea79c2df28e01b628a8ac08b5af33c7d50a931d1..0536866a58cee788374f63cad40259b122541474 100644 (file)
@@ -5,6 +5,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
@@ -96,6 +97,24 @@ struct udma_match_data {
        u32 level_start_idx[];
 };
 
+struct udma_hwdesc {
+       size_t cppi5_desc_size;
+       void *cppi5_desc_vaddr;
+       dma_addr_t cppi5_desc_paddr;
+
+       /* TR descriptor internal pointers */
+       void *tr_req_base;
+       struct cppi5_tr_resp_t *tr_resp_base;
+};
+
+struct udma_rx_flush {
+       struct udma_hwdesc hwdescs[2];
+
+       size_t buffer_size;
+       void *buffer_vaddr;
+       dma_addr_t buffer_paddr;
+};
+
 struct udma_dev {
        struct dma_device ddev;
        struct device *dev;
@@ -112,6 +131,8 @@ struct udma_dev {
        struct list_head desc_to_purge;
        spinlock_t lock;
 
+       struct udma_rx_flush rx_flush;
+
        int tchan_cnt;
        int echan_cnt;
        int rchan_cnt;
@@ -130,16 +151,6 @@ struct udma_dev {
        u32 psil_base;
 };
 
-struct udma_hwdesc {
-       size_t cppi5_desc_size;
-       void *cppi5_desc_vaddr;
-       dma_addr_t cppi5_desc_paddr;
-
-       /* TR descriptor internal pointers */
-       void *tr_req_base;
-       struct cppi5_tr_resp_t *tr_resp_base;
-};
-
 struct udma_desc {
        struct virt_dma_desc vd;
 
@@ -169,7 +180,7 @@ enum udma_chan_state {
 
 struct udma_tx_drain {
        struct delayed_work work;
-       unsigned long jiffie;
+       ktime_t tstamp;
        u32 residue;
 };
 
@@ -502,7 +513,7 @@ static bool udma_is_chan_paused(struct udma_chan *uc)
 {
        u32 val, pause_mask;
 
-       switch (uc->desc->dir) {
+       switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
                val = udma_rchanrt_read(uc->rchan,
                                        UDMA_RCHAN_RT_PEER_RT_EN_REG);
@@ -551,12 +562,17 @@ static void udma_sync_for_device(struct udma_chan *uc, int idx)
        }
 }
 
+static inline dma_addr_t udma_get_rx_flush_hwdesc_paddr(struct udma_chan *uc)
+{
+       return uc->ud->rx_flush.hwdescs[uc->config.pkt_mode].cppi5_desc_paddr;
+}
+
 static int udma_push_to_ring(struct udma_chan *uc, int idx)
 {
        struct udma_desc *d = uc->desc;
-
        struct k3_ring *ring = NULL;
-       int ret = -EINVAL;
+       dma_addr_t paddr;
+       int ret;
 
        switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
@@ -567,21 +583,37 @@ static int udma_push_to_ring(struct udma_chan *uc, int idx)
                ring = uc->tchan->t_ring;
                break;
        default:
-               break;
+               return -EINVAL;
        }
 
-       if (ring) {
-               dma_addr_t desc_addr = udma_curr_cppi5_desc_paddr(d, idx);
+       /* RX flush packet: idx == -1 is only passed in case of DEV_TO_MEM */
+       if (idx == -1) {
+               paddr = udma_get_rx_flush_hwdesc_paddr(uc);
+       } else {
+               paddr = udma_curr_cppi5_desc_paddr(d, idx);
 
                wmb(); /* Ensure that writes are not moved over this point */
                udma_sync_for_device(uc, idx);
-               ret = k3_ringacc_ring_push(ring, &desc_addr);
-               uc->in_ring_cnt++;
        }
 
+       ret = k3_ringacc_ring_push(ring, &paddr);
+       if (!ret)
+               uc->in_ring_cnt++;
+
        return ret;
 }
 
+static bool udma_desc_is_rx_flush(struct udma_chan *uc, dma_addr_t addr)
+{
+       if (uc->config.dir != DMA_DEV_TO_MEM)
+               return false;
+
+       if (addr == udma_get_rx_flush_hwdesc_paddr(uc))
+               return true;
+
+       return false;
+}
+
 static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
 {
        struct k3_ring *ring = NULL;
@@ -610,6 +642,10 @@ static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
                if (cppi5_desc_is_tdcm(*addr))
                        return ret;
 
+               /* Check for flush descriptor */
+               if (udma_desc_is_rx_flush(uc, *addr))
+                       return -ENOENT;
+
                d = udma_udma_desc_from_paddr(uc, *addr);
 
                if (d)
@@ -890,6 +926,9 @@ static int udma_stop(struct udma_chan *uc)
 
        switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
+               if (!uc->cyclic && !uc->desc)
+                       udma_push_to_ring(uc, -1);
+
                udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
                                   UDMA_PEER_RT_EN_ENABLE |
                                   UDMA_PEER_RT_EN_TEARDOWN);
@@ -946,9 +985,10 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
        peer_bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
        bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
 
+       /* Transfer is incomplete, store current residue and time stamp */
        if (peer_bcnt < bcnt) {
                uc->tx_drain.residue = bcnt - peer_bcnt;
-               uc->tx_drain.jiffie = jiffies;
+               uc->tx_drain.tstamp = ktime_get();
                return false;
        }
 
@@ -961,35 +1001,59 @@ static void udma_check_tx_completion(struct work_struct *work)
                                            tx_drain.work.work);
        bool desc_done = true;
        u32 residue_diff;
-       unsigned long jiffie_diff, delay;
+       ktime_t time_diff;
+       unsigned long delay;
+
+       while (1) {
+               if (uc->desc) {
+                       /* Get previous residue and time stamp */
+                       residue_diff = uc->tx_drain.residue;
+                       time_diff = uc->tx_drain.tstamp;
+                       /*
+                        * Get current residue and time stamp or see if
+                        * transfer is complete
+                        */
+                       desc_done = udma_is_desc_really_done(uc, uc->desc);
+               }
 
-       if (uc->desc) {
-               residue_diff = uc->tx_drain.residue;
-               jiffie_diff = uc->tx_drain.jiffie;
-               desc_done = udma_is_desc_really_done(uc, uc->desc);
-       }
-
-       if (!desc_done) {
-               jiffie_diff = uc->tx_drain.jiffie - jiffie_diff;
-               residue_diff -= uc->tx_drain.residue;
-               if (residue_diff) {
-                       /* Try to guess when we should check next time */
-                       residue_diff /= jiffie_diff;
-                       delay = uc->tx_drain.residue / residue_diff / 3;
-                       if (jiffies_to_msecs(delay) < 5)
-                               delay = 0;
-               } else {
-                       /* No progress, check again in 1 second  */
-                       delay = HZ;
+               if (!desc_done) {
+                       /*
+                        * Find the time delta and residue delta w.r.t
+                        * previous poll
+                        */
+                       time_diff = ktime_sub(uc->tx_drain.tstamp,
+                                             time_diff) + 1;
+                       residue_diff -= uc->tx_drain.residue;
+                       if (residue_diff) {
+                               /*
+                                * Try to guess when we should check
+                                * next time by calculating rate at
+                                * which data is being drained at the
+                                * peer device
+                                */
+                               delay = (time_diff / residue_diff) *
+                                       uc->tx_drain.residue;
+                       } else {
+                               /* No progress, check again in 1 second  */
+                               schedule_delayed_work(&uc->tx_drain.work, HZ);
+                               break;
+                       }
+
+                       usleep_range(ktime_to_us(delay),
+                                    ktime_to_us(delay) + 10);
+                       continue;
                }
 
-               schedule_delayed_work(&uc->tx_drain.work, delay);
-       } else if (uc->desc) {
-               struct udma_desc *d = uc->desc;
+               if (uc->desc) {
+                       struct udma_desc *d = uc->desc;
 
-               uc->bcnt += d->residue;
-               udma_start(uc);
-               vchan_cookie_complete(&d->vd);
+                       uc->bcnt += d->residue;
+                       udma_start(uc);
+                       vchan_cookie_complete(&d->vd);
+                       break;
+               }
+
+               break;
        }
 }
 
@@ -1033,29 +1097,27 @@ static irqreturn_t udma_ring_irq_handler(int irq, void *data)
                        goto out;
                }
 
-               if (uc->cyclic) {
-                       /* push the descriptor back to the ring */
-                       if (d == uc->desc) {
+               if (d == uc->desc) {
+                       /* active descriptor */
+                       if (uc->cyclic) {
                                udma_cyclic_packet_elapsed(uc);
                                vchan_cyclic_callback(&d->vd);
-                       }
-               } else {
-                       bool desc_done = false;
-
-                       if (d == uc->desc) {
-                               desc_done = udma_is_desc_really_done(uc, d);
-
-                               if (desc_done) {
+                       } else {
+                               if (udma_is_desc_really_done(uc, d)) {
                                        uc->bcnt += d->residue;
                                        udma_start(uc);
+                                       vchan_cookie_complete(&d->vd);
                                } else {
                                        schedule_delayed_work(&uc->tx_drain.work,
                                                              0);
                                }
                        }
-
-                       if (desc_done)
-                               vchan_cookie_complete(&d->vd);
+               } else {
+                       /*
+                        * terminated descriptor, mark the descriptor as
+                        * completed to update the channel's cookie marker
+                        */
+                       dma_cookie_complete(&d->vd.tx);
                }
        }
 out:
@@ -1965,36 +2027,81 @@ static struct udma_desc *udma_alloc_tr_desc(struct udma_chan *uc,
        return d;
 }
 
+/**
+ * udma_get_tr_counters - calculate TR counters for a given length
+ * @len: Length of the trasnfer
+ * @align_to: Preferred alignment
+ * @tr0_cnt0: First TR icnt0
+ * @tr0_cnt1: First TR icnt1
+ * @tr1_cnt0: Second (if used) TR icnt0
+ *
+ * For len < SZ_64K only one TR is enough, tr1_cnt0 is not updated
+ * For len >= SZ_64K two TRs are used in a simple way:
+ * First TR: SZ_64K-alignment blocks (tr0_cnt0, tr0_cnt1)
+ * Second TR: the remaining length (tr1_cnt0)
+ *
+ * Returns the number of TRs the length needs (1 or 2)
+ * -EINVAL if the length can not be supported
+ */
+static int udma_get_tr_counters(size_t len, unsigned long align_to,
+                               u16 *tr0_cnt0, u16 *tr0_cnt1, u16 *tr1_cnt0)
+{
+       if (len < SZ_64K) {
+               *tr0_cnt0 = len;
+               *tr0_cnt1 = 1;
+
+               return 1;
+       }
+
+       if (align_to > 3)
+               align_to = 3;
+
+realign:
+       *tr0_cnt0 = SZ_64K - BIT(align_to);
+       if (len / *tr0_cnt0 >= SZ_64K) {
+               if (align_to) {
+                       align_to--;
+                       goto realign;
+               }
+               return -EINVAL;
+       }
+
+       *tr0_cnt1 = len / *tr0_cnt0;
+       *tr1_cnt0 = len % *tr0_cnt0;
+
+       return 2;
+}
+
 static struct udma_desc *
 udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl,
                      unsigned int sglen, enum dma_transfer_direction dir,
                      unsigned long tx_flags, void *context)
 {
-       enum dma_slave_buswidth dev_width;
        struct scatterlist *sgent;
        struct udma_desc *d;
-       size_t tr_size;
        struct cppi5_tr_type1_t *tr_req = NULL;
+       u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
        unsigned int i;
-       u32 burst;
+       size_t tr_size;
+       int num_tr = 0;
+       int tr_idx = 0;
 
-       if (dir == DMA_DEV_TO_MEM) {
-               dev_width = uc->cfg.src_addr_width;
-               burst = uc->cfg.src_maxburst;
-       } else if (dir == DMA_MEM_TO_DEV) {
-               dev_width = uc->cfg.dst_addr_width;
-               burst = uc->cfg.dst_maxburst;
-       } else {
-               dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+       if (!is_slave_direction(dir)) {
+               dev_err(uc->ud->dev, "Only slave cyclic is supported\n");
                return NULL;
        }
 
-       if (!burst)
-               burst = 1;
+       /* estimate the number of TRs we will need */
+       for_each_sg(sgl, sgent, sglen, i) {
+               if (sg_dma_len(sgent) < SZ_64K)
+                       num_tr++;
+               else
+                       num_tr += 2;
+       }
 
        /* Now allocate and setup the descriptor. */
        tr_size = sizeof(struct cppi5_tr_type1_t);
-       d = udma_alloc_tr_desc(uc, tr_size, sglen, dir);
+       d = udma_alloc_tr_desc(uc, tr_size, num_tr, dir);
        if (!d)
                return NULL;
 
@@ -2002,19 +2109,46 @@ udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl,
 
        tr_req = d->hwdesc[0].tr_req_base;
        for_each_sg(sgl, sgent, sglen, i) {
-               d->residue += sg_dma_len(sgent);
+               dma_addr_t sg_addr = sg_dma_address(sgent);
+
+               num_tr = udma_get_tr_counters(sg_dma_len(sgent), __ffs(sg_addr),
+                                             &tr0_cnt0, &tr0_cnt1, &tr1_cnt0);
+               if (num_tr < 0) {
+                       dev_err(uc->ud->dev, "size %u is not supported\n",
+                               sg_dma_len(sgent));
+                       udma_free_hwdesc(uc, d);
+                       kfree(d);
+                       return NULL;
+               }
 
                cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
                              CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
                cppi5_tr_csf_set(&tr_req[i].flags, CPPI5_TR_CSF_SUPR_EVT);
 
-               tr_req[i].addr = sg_dma_address(sgent);
-               tr_req[i].icnt0 = burst * dev_width;
-               tr_req[i].dim1 = burst * dev_width;
-               tr_req[i].icnt1 = sg_dma_len(sgent) / tr_req[i].icnt0;
+               tr_req[tr_idx].addr = sg_addr;
+               tr_req[tr_idx].icnt0 = tr0_cnt0;
+               tr_req[tr_idx].icnt1 = tr0_cnt1;
+               tr_req[tr_idx].dim1 = tr0_cnt0;
+               tr_idx++;
+
+               if (num_tr == 2) {
+                       cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1,
+                                     false, false,
+                                     CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+                       cppi5_tr_csf_set(&tr_req[tr_idx].flags,
+                                        CPPI5_TR_CSF_SUPR_EVT);
+
+                       tr_req[tr_idx].addr = sg_addr + tr0_cnt1 * tr0_cnt0;
+                       tr_req[tr_idx].icnt0 = tr1_cnt0;
+                       tr_req[tr_idx].icnt1 = 1;
+                       tr_req[tr_idx].dim1 = tr1_cnt0;
+                       tr_idx++;
+               }
+
+               d->residue += sg_dma_len(sgent);
        }
 
-       cppi5_tr_csf_set(&tr_req[i - 1].flags, CPPI5_TR_CSF_EOP);
+       cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, CPPI5_TR_CSF_EOP);
 
        return d;
 }
@@ -2319,47 +2453,66 @@ udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr,
                        size_t buf_len, size_t period_len,
                        enum dma_transfer_direction dir, unsigned long flags)
 {
-       enum dma_slave_buswidth dev_width;
        struct udma_desc *d;
-       size_t tr_size;
+       size_t tr_size, period_addr;
        struct cppi5_tr_type1_t *tr_req;
-       unsigned int i;
        unsigned int periods = buf_len / period_len;
-       u32 burst;
+       u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
+       unsigned int i;
+       int num_tr;
 
-       if (dir == DMA_DEV_TO_MEM) {
-               dev_width = uc->cfg.src_addr_width;
-               burst = uc->cfg.src_maxburst;
-       } else if (dir == DMA_MEM_TO_DEV) {
-               dev_width = uc->cfg.dst_addr_width;
-               burst = uc->cfg.dst_maxburst;
-       } else {
-               dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+       if (!is_slave_direction(dir)) {
+               dev_err(uc->ud->dev, "Only slave cyclic is supported\n");
                return NULL;
        }
 
-       if (!burst)
-               burst = 1;
+       num_tr = udma_get_tr_counters(period_len, __ffs(buf_addr), &tr0_cnt0,
+                                     &tr0_cnt1, &tr1_cnt0);
+       if (num_tr < 0) {
+               dev_err(uc->ud->dev, "size %zu is not supported\n",
+                       period_len);
+               return NULL;
+       }
 
        /* Now allocate and setup the descriptor. */
        tr_size = sizeof(struct cppi5_tr_type1_t);
-       d = udma_alloc_tr_desc(uc, tr_size, periods, dir);
+       d = udma_alloc_tr_desc(uc, tr_size, periods * num_tr, dir);
        if (!d)
                return NULL;
 
        tr_req = d->hwdesc[0].tr_req_base;
+       period_addr = buf_addr;
        for (i = 0; i < periods; i++) {
-               cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
-                             CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+               int tr_idx = i * num_tr;
 
-               tr_req[i].addr = buf_addr + period_len * i;
-               tr_req[i].icnt0 = dev_width;
-               tr_req[i].icnt1 = period_len / dev_width;
-               tr_req[i].dim1 = dev_width;
+               cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false,
+                             false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+
+               tr_req[tr_idx].addr = period_addr;
+               tr_req[tr_idx].icnt0 = tr0_cnt0;
+               tr_req[tr_idx].icnt1 = tr0_cnt1;
+               tr_req[tr_idx].dim1 = tr0_cnt0;
+
+               if (num_tr == 2) {
+                       cppi5_tr_csf_set(&tr_req[tr_idx].flags,
+                                        CPPI5_TR_CSF_SUPR_EVT);
+                       tr_idx++;
+
+                       cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1,
+                                     false, false,
+                                     CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+
+                       tr_req[tr_idx].addr = period_addr + tr0_cnt1 * tr0_cnt0;
+                       tr_req[tr_idx].icnt0 = tr1_cnt0;
+                       tr_req[tr_idx].icnt1 = 1;
+                       tr_req[tr_idx].dim1 = tr1_cnt0;
+               }
 
                if (!(flags & DMA_PREP_INTERRUPT))
-                       cppi5_tr_csf_set(&tr_req[i].flags,
+                       cppi5_tr_csf_set(&tr_req[tr_idx].flags,
                                         CPPI5_TR_CSF_SUPR_EVT);
+
+               period_addr += period_len;
        }
 
        return d;
@@ -2517,29 +2670,12 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                return NULL;
        }
 
-       if (len < SZ_64K) {
-               num_tr = 1;
-               tr0_cnt0 = len;
-               tr0_cnt1 = 1;
-       } else {
-               unsigned long align_to = __ffs(src | dest);
-
-               if (align_to > 3)
-                       align_to = 3;
-               /*
-                * Keep simple: tr0: SZ_64K-alignment blocks,
-                *              tr1: the remaining
-                */
-               num_tr = 2;
-               tr0_cnt0 = (SZ_64K - BIT(align_to));
-               if (len / tr0_cnt0 >= SZ_64K) {
-                       dev_err(uc->ud->dev, "size %zu is not supported\n",
-                               len);
-                       return NULL;
-               }
-
-               tr0_cnt1 = len / tr0_cnt0;
-               tr1_cnt0 = len % tr0_cnt0;
+       num_tr = udma_get_tr_counters(len, __ffs(src | dest), &tr0_cnt0,
+                                     &tr0_cnt1, &tr1_cnt0);
+       if (num_tr < 0) {
+               dev_err(uc->ud->dev, "size %zu is not supported\n",
+                       len);
+               return NULL;
        }
 
        d = udma_alloc_tr_desc(uc, tr_size, num_tr, DMA_MEM_TO_MEM);
@@ -2631,6 +2767,9 @@ static enum dma_status udma_tx_status(struct dma_chan *chan,
 
        ret = dma_cookie_status(chan, cookie, txstate);
 
+       if (!udma_is_chan_running(uc))
+               ret = DMA_COMPLETE;
+
        if (ret == DMA_IN_PROGRESS && udma_is_chan_paused(uc))
                ret = DMA_PAUSED;
 
@@ -2697,11 +2836,8 @@ static int udma_pause(struct dma_chan *chan)
 {
        struct udma_chan *uc = to_udma_chan(chan);
 
-       if (!uc->desc)
-               return -EINVAL;
-
        /* pause the channel */
-       switch (uc->desc->dir) {
+       switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
                udma_rchanrt_update_bits(uc->rchan,
                                         UDMA_RCHAN_RT_PEER_RT_EN_REG,
@@ -2730,11 +2866,8 @@ static int udma_resume(struct dma_chan *chan)
 {
        struct udma_chan *uc = to_udma_chan(chan);
 
-       if (!uc->desc)
-               return -EINVAL;
-
        /* resume the channel */
-       switch (uc->desc->dir) {
+       switch (uc->config.dir) {
        case DMA_DEV_TO_MEM:
                udma_rchanrt_update_bits(uc->rchan,
                                         UDMA_RCHAN_RT_PEER_RT_EN_REG,
@@ -3248,6 +3381,98 @@ static int udma_setup_resources(struct udma_dev *ud)
        return ch_count;
 }
 
+static int udma_setup_rx_flush(struct udma_dev *ud)
+{
+       struct udma_rx_flush *rx_flush = &ud->rx_flush;
+       struct cppi5_desc_hdr_t *tr_desc;
+       struct cppi5_tr_type1_t *tr_req;
+       struct cppi5_host_desc_t *desc;
+       struct device *dev = ud->dev;
+       struct udma_hwdesc *hwdesc;
+       size_t tr_size;
+
+       /* Allocate 1K buffer for discarded data on RX channel teardown */
+       rx_flush->buffer_size = SZ_1K;
+       rx_flush->buffer_vaddr = devm_kzalloc(dev, rx_flush->buffer_size,
+                                             GFP_KERNEL);
+       if (!rx_flush->buffer_vaddr)
+               return -ENOMEM;
+
+       rx_flush->buffer_paddr = dma_map_single(dev, rx_flush->buffer_vaddr,
+                                               rx_flush->buffer_size,
+                                               DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, rx_flush->buffer_paddr))
+               return -ENOMEM;
+
+       /* Set up descriptor to be used for TR mode */
+       hwdesc = &rx_flush->hwdescs[0];
+       tr_size = sizeof(struct cppi5_tr_type1_t);
+       hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size, 1);
+       hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size,
+                                       ud->desc_align);
+
+       hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size,
+                                               GFP_KERNEL);
+       if (!hwdesc->cppi5_desc_vaddr)
+               return -ENOMEM;
+
+       hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr,
+                                                 hwdesc->cppi5_desc_size,
+                                                 DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr))
+               return -ENOMEM;
+
+       /* Start of the TR req records */
+       hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size;
+       /* Start address of the TR response array */
+       hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size;
+
+       tr_desc = hwdesc->cppi5_desc_vaddr;
+       cppi5_trdesc_init(tr_desc, 1, tr_size, 0, 0);
+       cppi5_desc_set_pktids(tr_desc, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+       cppi5_desc_set_retpolicy(tr_desc, 0, 0);
+
+       tr_req = hwdesc->tr_req_base;
+       cppi5_tr_init(&tr_req->flags, CPPI5_TR_TYPE1, false, false,
+                     CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+       cppi5_tr_csf_set(&tr_req->flags, CPPI5_TR_CSF_SUPR_EVT);
+
+       tr_req->addr = rx_flush->buffer_paddr;
+       tr_req->icnt0 = rx_flush->buffer_size;
+       tr_req->icnt1 = 1;
+
+       /* Set up descriptor to be used for packet mode */
+       hwdesc = &rx_flush->hwdescs[1];
+       hwdesc->cppi5_desc_size = ALIGN(sizeof(struct cppi5_host_desc_t) +
+                                       CPPI5_INFO0_HDESC_EPIB_SIZE +
+                                       CPPI5_INFO0_HDESC_PSDATA_MAX_SIZE,
+                                       ud->desc_align);
+
+       hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size,
+                                               GFP_KERNEL);
+       if (!hwdesc->cppi5_desc_vaddr)
+               return -ENOMEM;
+
+       hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr,
+                                                 hwdesc->cppi5_desc_size,
+                                                 DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr))
+               return -ENOMEM;
+
+       desc = hwdesc->cppi5_desc_vaddr;
+       cppi5_hdesc_init(desc, 0, 0);
+       cppi5_desc_set_pktids(&desc->hdr, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+       cppi5_desc_set_retpolicy(&desc->hdr, 0, 0);
+
+       cppi5_hdesc_attach_buf(desc,
+                              rx_flush->buffer_paddr, rx_flush->buffer_size,
+                              rx_flush->buffer_paddr, rx_flush->buffer_size);
+
+       dma_sync_single_for_device(dev, hwdesc->cppi5_desc_paddr,
+                                  hwdesc->cppi5_desc_size, DMA_TO_DEVICE);
+       return 0;
+}
+
 #define TI_UDMAC_BUSWIDTHS     (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
                                 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
                                 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
@@ -3361,6 +3586,10 @@ static int udma_probe(struct platform_device *pdev)
        if (ud->desc_align < dma_get_cache_alignment())
                ud->desc_align = dma_get_cache_alignment();
 
+       ret = udma_setup_rx_flush(ud);
+       if (ret)
+               return ret;
+
        for (i = 0; i < ud->tchan_cnt; i++) {
                struct udma_tchan *tchan = &ud->tchans[i];