dmaengine: stm32-dma3: improve residue granularity
authorAmelie Delaunay <amelie.delaunay@foss.st.com>
Fri, 31 May 2024 15:07:09 +0000 (17:07 +0200)
committerVinod Koul <vkoul@kernel.org>
Tue, 11 Jun 2024 18:25:34 +0000 (23:55 +0530)
Implement own device_tx_status ops to compute the residue with a finer
granularity, up to bytes.
STM32 DMA3 has a bitfield, BNDT, in CxTR1 register which reflects the
number of bytes read from the source.
It also has a bitfield, FIFOL, in CxSR register which reflects the FIFO
level in units of programmed destination data width.
The channel is briefly suspended to get a coherent snapshot of registers.
It is possible to correct the fifo level when packing/unpacking is enabled
with destination increment.

Signed-off-by: Amelie Delaunay <amelie.delaunay@foss.st.com>
Link: https://lore.kernel.org/r/20240531150712.2503554-10-amelie.delaunay@foss.st.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
drivers/dma/stm32/stm32-dma3.c

index 8182d82501a365ed89b8b1f0adcac17e17ef1fd0..66d5129f4822a2e12308f32fa77d5a346877539e 100644 (file)
@@ -808,6 +808,134 @@ static void stm32_dma3_chan_reset(struct stm32_dma3_chan *chan)
        writel_relaxed(ccr |= CCR_RESET, ddata->base + STM32_DMA3_CCR(chan->id));
 }
 
+static int stm32_dma3_chan_get_curr_hwdesc(struct stm32_dma3_swdesc *swdesc, u32 cllr, u32 *residue)
+{
+       u32 i, lli_offset, next_lli_offset = cllr & CLLR_LA;
+
+       /* If cllr is null, it means it is either the last or single item */
+       if (!cllr)
+               return swdesc->lli_size - 1;
+
+       /* In cyclic mode, go fast and first check we are not on the last item */
+       if (swdesc->cyclic && next_lli_offset == (swdesc->lli[0].hwdesc_addr & CLLR_LA))
+               return swdesc->lli_size - 1;
+
+       /* As transfer is in progress, look backward from the last item */
+       for (i = swdesc->lli_size - 1; i > 0; i--) {
+               *residue += FIELD_GET(CBR1_BNDT, swdesc->lli[i].hwdesc->cbr1);
+               lli_offset = swdesc->lli[i].hwdesc_addr & CLLR_LA;
+               if (lli_offset == next_lli_offset)
+                       return i - 1;
+       }
+
+       return -EINVAL;
+}
+
+static void stm32_dma3_chan_set_residue(struct stm32_dma3_chan *chan,
+                                       struct stm32_dma3_swdesc *swdesc,
+                                       struct dma_tx_state *txstate)
+{
+       struct stm32_dma3_ddata *ddata = to_stm32_dma3_ddata(chan);
+       struct device *dev = chan2dev(chan);
+       struct stm32_dma3_hwdesc *hwdesc;
+       u32 residue, curr_lli, csr, cdar, cbr1, cllr, bndt, fifol;
+       bool pack_unpack;
+       int ret;
+
+       csr = readl_relaxed(ddata->base + STM32_DMA3_CSR(chan->id));
+       if (!(csr & CSR_IDLEF) && chan->dma_status != DMA_PAUSED) {
+               /* Suspend current transfer to read registers for a snapshot */
+               writel_relaxed(swdesc->ccr | CCR_SUSP, ddata->base + STM32_DMA3_CCR(chan->id));
+               ret = readl_relaxed_poll_timeout_atomic(ddata->base + STM32_DMA3_CSR(chan->id), csr,
+                                                       csr & (CSR_SUSPF | CSR_IDLEF), 1, 10);
+
+               if (ret || ((csr & CSR_TCF) && (csr & CSR_IDLEF))) {
+                       writel_relaxed(CFCR_SUSPF, ddata->base + STM32_DMA3_CFCR(chan->id));
+                       writel_relaxed(swdesc->ccr, ddata->base + STM32_DMA3_CCR(chan->id));
+                       if (ret)
+                               dev_err(dev, "Channel suspension timeout, csr=%08x\n", csr);
+               }
+       }
+
+       /* If channel is still active (CSR_IDLEF is not set), can't get a reliable residue */
+       if (!(csr & CSR_IDLEF))
+               dev_warn(dev, "Can't get residue: channel still active, csr=%08x\n", csr);
+
+       /*
+        * If channel is not suspended, but Idle and Transfer Complete are set,
+        * linked-list is over, no residue
+        */
+       if (!(csr & CSR_SUSPF) && (csr & CSR_TCF) && (csr & CSR_IDLEF))
+               return;
+
+       /* Read registers to have a snapshot */
+       cllr = readl_relaxed(ddata->base + STM32_DMA3_CLLR(chan->id));
+       cbr1 = readl_relaxed(ddata->base + STM32_DMA3_CBR1(chan->id));
+       cdar = readl_relaxed(ddata->base + STM32_DMA3_CDAR(chan->id));
+
+       /* Resume current transfer */
+       if (csr & CSR_SUSPF) {
+               writel_relaxed(CFCR_SUSPF, ddata->base + STM32_DMA3_CFCR(chan->id));
+               writel_relaxed(swdesc->ccr, ddata->base + STM32_DMA3_CCR(chan->id));
+       }
+
+       /* Add current BNDT */
+       bndt = FIELD_GET(CBR1_BNDT, cbr1);
+       residue = bndt;
+
+       /* Get current hwdesc and cumulate residue of pending hwdesc BNDT */
+       ret = stm32_dma3_chan_get_curr_hwdesc(swdesc, cllr, &residue);
+       if (ret < 0) {
+               dev_err(chan2dev(chan), "Can't get residue: current hwdesc not found\n");
+               return;
+       }
+       curr_lli = ret;
+
+       /* Read current FIFO level - in units of programmed destination data width */
+       hwdesc = swdesc->lli[curr_lli].hwdesc;
+       fifol = FIELD_GET(CSR_FIFOL, csr) * (1 << FIELD_GET(CTR1_DDW_LOG2, hwdesc->ctr1));
+       /* If the FIFO contains as many bytes as its size, it can't contain more */
+       if (fifol == (1 << (chan->fifo_size + 1)))
+               goto skip_fifol_update;
+
+       /*
+        * In case of PACKING (Destination burst length > Source burst length) or UNPACKING
+        * (Source burst length > Destination burst length), bytes could be pending in the FIFO
+        * (to be packed up to Destination burst length or unpacked into Destination burst length
+        * chunks).
+        * BNDT is not reliable, as it reflects the number of bytes read from the source but not the
+        * number of bytes written to the destination.
+        * FIFOL is also not sufficient, because it reflects the number of available write beats in
+        * units of Destination data width but not the bytes not yet packed or unpacked.
+        * In case of Destination increment DINC, it is possible to compute the number of bytes in
+        * the FIFO:
+        * fifol_in_bytes = bytes_read - bytes_written.
+        */
+       pack_unpack = !!(FIELD_GET(CTR1_PAM, hwdesc->ctr1) == CTR1_PAM_PACK_UNPACK);
+       if (pack_unpack && (hwdesc->ctr1 & CTR1_DINC)) {
+               int bytes_read = FIELD_GET(CBR1_BNDT, hwdesc->cbr1) - bndt;
+               int bytes_written = cdar - hwdesc->cdar;
+
+               if (bytes_read > 0)
+                       fifol = bytes_read - bytes_written;
+       }
+
+skip_fifol_update:
+       if (fifol) {
+               dev_dbg(chan2dev(chan), "%u byte(s) in the FIFO\n", fifol);
+               dma_set_in_flight_bytes(txstate, fifol);
+               /*
+                * Residue is already accurate for DMA_MEM_TO_DEV as BNDT reflects data read from
+                * the source memory buffer, so just need to add fifol to residue in case of
+                * DMA_DEV_TO_MEM transfer because these bytes are not yet written in destination
+                * memory buffer.
+                */
+               if (chan->dma_config.direction == DMA_DEV_TO_MEM)
+                       residue += fifol;
+       }
+       dma_set_residue(txstate, residue);
+}
+
 static int stm32_dma3_chan_stop(struct stm32_dma3_chan *chan)
 {
        struct stm32_dma3_ddata *ddata = to_stm32_dma3_ddata(chan);
@@ -1310,6 +1438,39 @@ static void stm32_dma3_synchronize(struct dma_chan *c)
        vchan_synchronize(&chan->vchan);
 }
 
+static enum dma_status stm32_dma3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+                                           struct dma_tx_state *txstate)
+{
+       struct stm32_dma3_chan *chan = to_stm32_dma3_chan(c);
+       struct stm32_dma3_swdesc *swdesc = NULL;
+       enum dma_status status;
+       unsigned long flags;
+       struct virt_dma_desc *vd;
+
+       status = dma_cookie_status(c, cookie, txstate);
+       if (status == DMA_COMPLETE)
+               return status;
+
+       if (!txstate)
+               return chan->dma_status;
+
+       spin_lock_irqsave(&chan->vchan.lock, flags);
+
+       vd = vchan_find_desc(&chan->vchan, cookie);
+       if (vd)
+               swdesc = to_stm32_dma3_swdesc(vd);
+       else if (chan->swdesc && chan->swdesc->vdesc.tx.cookie == cookie)
+               swdesc = chan->swdesc;
+
+       /* Get residue/in_flight_bytes only if a transfer is currently running (swdesc != NULL) */
+       if (swdesc)
+               stm32_dma3_chan_set_residue(chan, swdesc, txstate);
+
+       spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+       return chan->dma_status;
+}
+
 static void stm32_dma3_issue_pending(struct dma_chan *c)
 {
        struct stm32_dma3_chan *chan = to_stm32_dma3_chan(c);
@@ -1506,7 +1667,7 @@ static int stm32_dma3_probe(struct platform_device *pdev)
 
        dma_dev->descriptor_reuse = true;
        dma_dev->max_sg_burst = STM32_DMA3_MAX_SEG_SIZE;
-       dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+       dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
        dma_dev->device_alloc_chan_resources = stm32_dma3_alloc_chan_resources;
        dma_dev->device_free_chan_resources = stm32_dma3_free_chan_resources;
        dma_dev->device_prep_dma_memcpy = stm32_dma3_prep_dma_memcpy;
@@ -1518,7 +1679,7 @@ static int stm32_dma3_probe(struct platform_device *pdev)
        dma_dev->device_resume = stm32_dma3_resume;
        dma_dev->device_terminate_all = stm32_dma3_terminate_all;
        dma_dev->device_synchronize = stm32_dma3_synchronize;
-       dma_dev->device_tx_status = dma_cookie_status;
+       dma_dev->device_tx_status = stm32_dma3_tx_status;
        dma_dev->device_issue_pending = stm32_dma3_issue_pending;
 
        /* if dma_channels is not modified, get it from hwcfgr1 */