tty: serial: fsl_lpuart: Use scatter/gather DMA for Tx
[linux-2.6-block.git] / drivers / tty / serial / fsl_lpuart.c
index 7f95f782a48561e5ef466230c1a54dc8cf17455d..93e7589893d2cdeef951b499fb6227af953b6710 100644 (file)
 #define UARTWATER_TXWATER_OFF  0
 #define UARTWATER_RXWATER_OFF  16
 
-#define FSL_UART_RX_DMA_BUFFER_SIZE    64
+/* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */
+#define DMA_RX_TIMEOUT         (10)
 
 #define DRIVER_NAME    "fsl-lpuart"
 #define DEV_NAME       "ttyLP"
@@ -243,18 +244,18 @@ struct lpuart_port {
        struct dma_chan         *dma_rx_chan;
        struct dma_async_tx_descriptor  *dma_tx_desc;
        struct dma_async_tx_descriptor  *dma_rx_desc;
-       dma_addr_t              dma_tx_buf_bus;
-       dma_addr_t              dma_rx_buf_bus;
        dma_cookie_t            dma_tx_cookie;
        dma_cookie_t            dma_rx_cookie;
-       unsigned char           *dma_tx_buf_virt;
-       unsigned char           *dma_rx_buf_virt;
        unsigned int            dma_tx_bytes;
        unsigned int            dma_rx_bytes;
-       int                     dma_tx_in_progress;
-       int                     dma_rx_in_progress;
+       bool                    dma_tx_in_progress;
        unsigned int            dma_rx_timeout;
        struct timer_list       lpuart_timer;
+       struct scatterlist      rx_sgl, tx_sgl[2];
+       struct circ_buf         rx_ring;
+       int                     rx_dma_rng_buf_len;
+       unsigned int            dma_tx_nents;
+       wait_queue_head_t       dma_wait;
 };
 
 static const struct of_device_id lpuart_dt_ids[] = {
@@ -270,7 +271,6 @@ MODULE_DEVICE_TABLE(of, lpuart_dt_ids);
 
 /* Forward declare this for the dma callbacks*/
 static void lpuart_dma_tx_complete(void *arg);
-static void lpuart_dma_rx_complete(void *arg);
 
 static u32 lpuart32_read(void __iomem *addr)
 {
@@ -316,141 +316,103 @@ static void lpuart32_stop_rx(struct uart_port *port)
        lpuart32_write(temp & ~UARTCTRL_RE, port->membase + UARTCTRL);
 }
 
-static void lpuart_copy_rx_to_tty(struct lpuart_port *sport,
-               struct tty_port *tty, int count)
+static void lpuart_dma_tx(struct lpuart_port *sport)
 {
-       int copied;
-
-       sport->port.icount.rx += count;
+       struct circ_buf *xmit = &sport->port.state->xmit;
+       struct scatterlist *sgl = sport->tx_sgl;
+       struct device *dev = sport->port.dev;
+       int ret;
 
-       if (!tty) {
-               dev_err(sport->port.dev, "No tty port\n");
+       if (sport->dma_tx_in_progress)
                return;
-       }
 
-       dma_sync_single_for_cpu(sport->port.dev, sport->dma_rx_buf_bus,
-                       FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
-       copied = tty_insert_flip_string(tty,
-                       ((unsigned char *)(sport->dma_rx_buf_virt)), count);
+       sport->dma_tx_bytes = uart_circ_chars_pending(xmit);
 
-       if (copied != count) {
-               WARN_ON(1);
-               dev_err(sport->port.dev, "RxData copy to tty layer failed\n");
+       if (xmit->tail < xmit->head) {
+               sport->dma_tx_nents = 1;
+               sg_init_one(sgl, xmit->buf + xmit->tail, sport->dma_tx_bytes);
+       } else {
+               sport->dma_tx_nents = 2;
+               sg_init_table(sgl, 2);
+               sg_set_buf(sgl, xmit->buf + xmit->tail,
+                               UART_XMIT_SIZE - xmit->tail);
+               sg_set_buf(sgl + 1, xmit->buf, xmit->head);
        }
 
-       dma_sync_single_for_device(sport->port.dev, sport->dma_rx_buf_bus,
-                       FSL_UART_RX_DMA_BUFFER_SIZE, DMA_TO_DEVICE);
-}
-
-static void lpuart_pio_tx(struct lpuart_port *sport)
-{
-       struct circ_buf *xmit = &sport->port.state->xmit;
-       unsigned long flags;
-
-       spin_lock_irqsave(&sport->port.lock, flags);
-
-       while (!uart_circ_empty(xmit) &&
-               readb(sport->port.membase + UARTTCFIFO) < sport->txfifo_size) {
-               writeb(xmit->buf[xmit->tail], sport->port.membase + UARTDR);
-               xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-               sport->port.icount.tx++;
+       ret = dma_map_sg(dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+       if (!ret) {
+               dev_err(dev, "DMA mapping error for TX.\n");
+               return;
        }
 
-       if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-               uart_write_wakeup(&sport->port);
-
-       if (uart_circ_empty(xmit))
-               writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_TDMAS,
-                       sport->port.membase + UARTCR5);
-
-       spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static int lpuart_dma_tx(struct lpuart_port *sport, unsigned long count)
-{
-       struct circ_buf *xmit = &sport->port.state->xmit;
-       dma_addr_t tx_bus_addr;
-
-       dma_sync_single_for_device(sport->port.dev, sport->dma_tx_buf_bus,
-                               UART_XMIT_SIZE, DMA_TO_DEVICE);
-       sport->dma_tx_bytes = count & ~(sport->txfifo_size - 1);
-       tx_bus_addr = sport->dma_tx_buf_bus + xmit->tail;
-       sport->dma_tx_desc = dmaengine_prep_slave_single(sport->dma_tx_chan,
-                                       tx_bus_addr, sport->dma_tx_bytes,
+       sport->dma_tx_desc = dmaengine_prep_slave_sg(sport->dma_tx_chan, sgl,
+                                       sport->dma_tx_nents,
                                        DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
-
        if (!sport->dma_tx_desc) {
-               dev_err(sport->port.dev, "Not able to get desc for tx\n");
-               return -EIO;
+               dma_unmap_sg(dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+               dev_err(dev, "Cannot prepare TX slave DMA!\n");
+               return;
        }
 
        sport->dma_tx_desc->callback = lpuart_dma_tx_complete;
        sport->dma_tx_desc->callback_param = sport;
-       sport->dma_tx_in_progress = 1;
+       sport->dma_tx_in_progress = true;
        sport->dma_tx_cookie = dmaengine_submit(sport->dma_tx_desc);
        dma_async_issue_pending(sport->dma_tx_chan);
 
-       return 0;
-}
-
-static void lpuart_prepare_tx(struct lpuart_port *sport)
-{
-       struct circ_buf *xmit = &sport->port.state->xmit;
-       unsigned long count =  CIRC_CNT_TO_END(xmit->head,
-                                       xmit->tail, UART_XMIT_SIZE);
-
-       if (!count)
-               return;
-
-       if (count < sport->txfifo_size)
-               writeb(readb(sport->port.membase + UARTCR5) & ~UARTCR5_TDMAS,
-                               sport->port.membase + UARTCR5);
-       else {
-               writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_TDMAS,
-                               sport->port.membase + UARTCR5);
-               lpuart_dma_tx(sport, count);
-       }
 }
 
 static void lpuart_dma_tx_complete(void *arg)
 {
        struct lpuart_port *sport = arg;
+       struct scatterlist *sgl = &sport->tx_sgl[0];
        struct circ_buf *xmit = &sport->port.state->xmit;
        unsigned long flags;
 
-       async_tx_ack(sport->dma_tx_desc);
-
        spin_lock_irqsave(&sport->port.lock, flags);
 
+       dma_unmap_sg(sport->port.dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+
        xmit->tail = (xmit->tail + sport->dma_tx_bytes) & (UART_XMIT_SIZE - 1);
-       sport->dma_tx_in_progress = 0;
+
+       sport->port.icount.tx += sport->dma_tx_bytes;
+       sport->dma_tx_in_progress = false;
+       spin_unlock_irqrestore(&sport->port.lock, flags);
 
        if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
                uart_write_wakeup(&sport->port);
 
-       lpuart_prepare_tx(sport);
+       if (waitqueue_active(&sport->dma_wait)) {
+               wake_up(&sport->dma_wait);
+               return;
+       }
+
+       spin_lock_irqsave(&sport->port.lock, flags);
+
+       if (!uart_circ_empty(xmit) && !uart_tx_stopped(&sport->port))
+               lpuart_dma_tx(sport);
 
        spin_unlock_irqrestore(&sport->port.lock, flags);
 }
 
-static int lpuart_dma_rx(struct lpuart_port *sport)
+static int lpuart_dma_tx_request(struct uart_port *port)
 {
-       dma_sync_single_for_device(sport->port.dev, sport->dma_rx_buf_bus,
-                       FSL_UART_RX_DMA_BUFFER_SIZE, DMA_TO_DEVICE);
-       sport->dma_rx_desc = dmaengine_prep_slave_single(sport->dma_rx_chan,
-                       sport->dma_rx_buf_bus, FSL_UART_RX_DMA_BUFFER_SIZE,
-                       DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
+       struct lpuart_port *sport = container_of(port,
+                                       struct lpuart_port, port);
+       struct dma_slave_config dma_tx_sconfig = {};
+       int ret;
 
-       if (!sport->dma_rx_desc) {
-               dev_err(sport->port.dev, "Not able to get desc for rx\n");
-               return -EIO;
-       }
+       dma_tx_sconfig.dst_addr = sport->port.mapbase + UARTDR;
+       dma_tx_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+       dma_tx_sconfig.dst_maxburst = 1;
+       dma_tx_sconfig.direction = DMA_MEM_TO_DEV;
+       ret = dmaengine_slave_config(sport->dma_tx_chan, &dma_tx_sconfig);
 
-       sport->dma_rx_desc->callback = lpuart_dma_rx_complete;
-       sport->dma_rx_desc->callback_param = sport;
-       sport->dma_rx_in_progress = 1;
-       sport->dma_rx_cookie = dmaengine_submit(sport->dma_rx_desc);
-       dma_async_issue_pending(sport->dma_rx_chan);
+       if (ret) {
+               dev_err(sport->port.dev,
+                               "DMA slave config failed, err = %d\n", ret);
+               return ret;
+       }
 
        return 0;
 }
@@ -458,75 +420,17 @@ static int lpuart_dma_rx(struct lpuart_port *sport)
 static void lpuart_flush_buffer(struct uart_port *port)
 {
        struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
+
        if (sport->lpuart_dma_tx_use) {
+               if (sport->dma_tx_in_progress) {
+                       dma_unmap_sg(sport->port.dev, &sport->tx_sgl[0],
+                               sport->dma_tx_nents, DMA_TO_DEVICE);
+                       sport->dma_tx_in_progress = false;
+               }
                dmaengine_terminate_all(sport->dma_tx_chan);
-               sport->dma_tx_in_progress = 0;
        }
 }
 
-static void lpuart_dma_rx_complete(void *arg)
-{
-       struct lpuart_port *sport = arg;
-       struct tty_port *port = &sport->port.state->port;
-       unsigned long flags;
-
-       async_tx_ack(sport->dma_rx_desc);
-       mod_timer(&sport->lpuart_timer, jiffies + sport->dma_rx_timeout);
-
-       spin_lock_irqsave(&sport->port.lock, flags);
-
-       sport->dma_rx_in_progress = 0;
-       lpuart_copy_rx_to_tty(sport, port, FSL_UART_RX_DMA_BUFFER_SIZE);
-       tty_flip_buffer_push(port);
-       lpuart_dma_rx(sport);
-
-       spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static void lpuart_timer_func(unsigned long data)
-{
-       struct lpuart_port *sport = (struct lpuart_port *)data;
-       struct tty_port *port = &sport->port.state->port;
-       struct dma_tx_state state;
-       unsigned long flags;
-       unsigned char temp;
-       int count;
-
-       del_timer(&sport->lpuart_timer);
-       dmaengine_pause(sport->dma_rx_chan);
-       dmaengine_tx_status(sport->dma_rx_chan, sport->dma_rx_cookie, &state);
-       dmaengine_terminate_all(sport->dma_rx_chan);
-       count = FSL_UART_RX_DMA_BUFFER_SIZE - state.residue;
-       async_tx_ack(sport->dma_rx_desc);
-
-       spin_lock_irqsave(&sport->port.lock, flags);
-
-       sport->dma_rx_in_progress = 0;
-       lpuart_copy_rx_to_tty(sport, port, count);
-       tty_flip_buffer_push(port);
-       temp = readb(sport->port.membase + UARTCR5);
-       writeb(temp & ~UARTCR5_RDMAS, sport->port.membase + UARTCR5);
-
-       spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static inline void lpuart_prepare_rx(struct lpuart_port *sport)
-{
-       unsigned long flags;
-       unsigned char temp;
-
-       spin_lock_irqsave(&sport->port.lock, flags);
-
-       sport->lpuart_timer.expires = jiffies + sport->dma_rx_timeout;
-       add_timer(&sport->lpuart_timer);
-
-       lpuart_dma_rx(sport);
-       temp = readb(sport->port.membase + UARTCR5);
-       writeb(temp | UARTCR5_RDMAS, sport->port.membase + UARTCR5);
-
-       spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
 static inline void lpuart_transmit_buffer(struct lpuart_port *sport)
 {
        struct circ_buf *xmit = &sport->port.state->xmit;
@@ -580,8 +484,8 @@ static void lpuart_start_tx(struct uart_port *port)
        writeb(temp | UARTCR2_TIE, port->membase + UARTCR2);
 
        if (sport->lpuart_dma_tx_use) {
-               if (!uart_circ_empty(xmit) && !sport->dma_tx_in_progress)
-                       lpuart_prepare_tx(sport);
+               if (!uart_circ_empty(xmit) && !uart_tx_stopped(port))
+                       lpuart_dma_tx(sport);
        } else {
                if (readb(port->membase + UARTSR1) & UARTSR1_TDRE)
                        lpuart_transmit_buffer(sport);
@@ -600,6 +504,29 @@ static void lpuart32_start_tx(struct uart_port *port)
                lpuart32_transmit_buffer(sport);
 }
 
+/* return TIOCSER_TEMT when transmitter is not busy */
+static unsigned int lpuart_tx_empty(struct uart_port *port)
+{
+       struct lpuart_port *sport = container_of(port,
+                       struct lpuart_port, port);
+       unsigned char sr1 = readb(port->membase + UARTSR1);
+       unsigned char sfifo = readb(port->membase + UARTSFIFO);
+
+       if (sport->dma_tx_in_progress)
+               return 0;
+
+       if (sr1 & UARTSR1_TC && sfifo & UARTSFIFO_TXEMPT)
+               return TIOCSER_TEMT;
+
+       return 0;
+}
+
+static unsigned int lpuart32_tx_empty(struct uart_port *port)
+{
+       return (lpuart32_read(port->membase + UARTSTAT) & UARTSTAT_TC) ?
+               TIOCSER_TEMT : 0;
+}
+
 static irqreturn_t lpuart_txint(int irq, void *dev_id)
 {
        struct lpuart_port *sport = dev_id;
@@ -766,23 +693,15 @@ out:
 static irqreturn_t lpuart_int(int irq, void *dev_id)
 {
        struct lpuart_port *sport = dev_id;
-       unsigned char sts, crdma;
+       unsigned char sts;
 
        sts = readb(sport->port.membase + UARTSR1);
-       crdma = readb(sport->port.membase + UARTCR5);
 
-       if (sts & UARTSR1_RDRF && !(crdma & UARTCR5_RDMAS)) {
-               if (sport->lpuart_dma_rx_use)
-                       lpuart_prepare_rx(sport);
-               else
-                       lpuart_rxint(irq, dev_id);
-       }
-       if (sts & UARTSR1_TDRE && !(crdma & UARTCR5_TDMAS)) {
-               if (sport->lpuart_dma_tx_use)
-                       lpuart_pio_tx(sport);
-               else
-                       lpuart_txint(irq, dev_id);
-       }
+       if (sts & UARTSR1_RDRF)
+               lpuart_rxint(irq, dev_id);
+
+       if (sts & UARTSR1_TDRE)
+               lpuart_txint(irq, dev_id);
 
        return IRQ_HANDLED;
 }
@@ -807,17 +726,195 @@ static irqreturn_t lpuart32_int(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-/* return TIOCSER_TEMT when transmitter is not busy */
-static unsigned int lpuart_tx_empty(struct uart_port *port)
+static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
 {
-       return (readb(port->membase + UARTSR1) & UARTSR1_TC) ?
-               TIOCSER_TEMT : 0;
+       struct tty_port *port = &sport->port.state->port;
+       struct dma_tx_state state;
+       enum dma_status dmastat;
+       struct circ_buf *ring = &sport->rx_ring;
+       unsigned long flags;
+       int count = 0;
+       unsigned char sr;
+
+       sr = readb(sport->port.membase + UARTSR1);
+
+       if (sr & (UARTSR1_PE | UARTSR1_FE)) {
+               /* Read DR to clear the error flags */
+               readb(sport->port.membase + UARTDR);
+
+               if (sr & UARTSR1_PE)
+                   sport->port.icount.parity++;
+               else if (sr & UARTSR1_FE)
+                   sport->port.icount.frame++;
+       }
+
+       async_tx_ack(sport->dma_rx_desc);
+
+       spin_lock_irqsave(&sport->port.lock, flags);
+
+       dmastat = dmaengine_tx_status(sport->dma_rx_chan,
+                               sport->dma_rx_cookie,
+                               &state);
+
+       if (dmastat == DMA_ERROR) {
+               dev_err(sport->port.dev, "Rx DMA transfer failed!\n");
+               spin_unlock_irqrestore(&sport->port.lock, flags);
+               return;
+       }
+
+       /* CPU claims ownership of RX DMA buffer */
+       dma_sync_sg_for_cpu(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+
+       /*
+        * ring->head points to the end of data already written by the DMA.
+        * ring->tail points to the beginning of data to be read by the
+        * framework.
+        * The current transfer size should not be larger than the dma buffer
+        * length.
+        */
+       ring->head = sport->rx_sgl.length - state.residue;
+       BUG_ON(ring->head > sport->rx_sgl.length);
+       /*
+        * At this point ring->head may point to the first byte right after the
+        * last byte of the dma buffer:
+        * 0 <= ring->head <= sport->rx_sgl.length
+        *
+        * However ring->tail must always points inside the dma buffer:
+        * 0 <= ring->tail <= sport->rx_sgl.length - 1
+        *
+        * Since we use a ring buffer, we have to handle the case
+        * where head is lower than tail. In such a case, we first read from
+        * tail to the end of the buffer then reset tail.
+        */
+       if (ring->head < ring->tail) {
+               count = sport->rx_sgl.length - ring->tail;
+
+               tty_insert_flip_string(port, ring->buf + ring->tail, count);
+               ring->tail = 0;
+               sport->port.icount.rx += count;
+       }
+
+       /* Finally we read data from tail to head */
+       if (ring->tail < ring->head) {
+               count = ring->head - ring->tail;
+               tty_insert_flip_string(port, ring->buf + ring->tail, count);
+               /* Wrap ring->head if needed */
+               if (ring->head >= sport->rx_sgl.length)
+                       ring->head = 0;
+               ring->tail = ring->head;
+               sport->port.icount.rx += count;
+       }
+
+       dma_sync_sg_for_device(sport->port.dev, &sport->rx_sgl, 1,
+                              DMA_FROM_DEVICE);
+
+       spin_unlock_irqrestore(&sport->port.lock, flags);
+
+       tty_flip_buffer_push(port);
+       mod_timer(&sport->lpuart_timer, jiffies + sport->dma_rx_timeout);
 }
 
-static unsigned int lpuart32_tx_empty(struct uart_port *port)
+static void lpuart_dma_rx_complete(void *arg)
 {
-       return (lpuart32_read(port->membase + UARTSTAT) & UARTSTAT_TC) ?
-               TIOCSER_TEMT : 0;
+       struct lpuart_port *sport = arg;
+
+       lpuart_copy_rx_to_tty(sport);
+}
+
+static void lpuart_timer_func(unsigned long data)
+{
+       struct lpuart_port *sport = (struct lpuart_port *)data;
+
+       lpuart_copy_rx_to_tty(sport);
+}
+
+static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
+{
+       struct dma_slave_config dma_rx_sconfig = {};
+       struct circ_buf *ring = &sport->rx_ring;
+       int ret, nent;
+       int bits, baud;
+       struct tty_struct *tty = tty_port_tty_get(&sport->port.state->port);
+       struct ktermios *termios = &tty->termios;
+
+       baud = tty_get_baud_rate(tty);
+
+       bits = (termios->c_cflag & CSIZE) == CS7 ? 9 : 10;
+       if (termios->c_cflag & PARENB)
+               bits++;
+
+       /*
+        * Calculate length of one DMA buffer size to keep latency below
+        * 10ms at any baud rate.
+        */
+       sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud /  bits / 1000) * 2;
+       sport->rx_dma_rng_buf_len = (1 << (fls(sport->rx_dma_rng_buf_len) - 1));
+       if (sport->rx_dma_rng_buf_len < 16)
+               sport->rx_dma_rng_buf_len = 16;
+
+       ring->buf = kmalloc(sport->rx_dma_rng_buf_len, GFP_KERNEL);
+       if (!ring->buf) {
+               dev_err(sport->port.dev, "Ring buf alloc failed\n");
+               return -ENOMEM;
+       }
+
+       sg_init_one(&sport->rx_sgl, ring->buf, sport->rx_dma_rng_buf_len);
+       sg_set_buf(&sport->rx_sgl, ring->buf, sport->rx_dma_rng_buf_len);
+       nent = dma_map_sg(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+
+       if (!nent) {
+               dev_err(sport->port.dev, "DMA Rx mapping error\n");
+               return -EINVAL;
+       }
+
+       dma_rx_sconfig.src_addr = sport->port.mapbase + UARTDR;
+       dma_rx_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+       dma_rx_sconfig.src_maxburst = 1;
+       dma_rx_sconfig.direction = DMA_DEV_TO_MEM;
+       ret = dmaengine_slave_config(sport->dma_rx_chan, &dma_rx_sconfig);
+
+       if (ret < 0) {
+               dev_err(sport->port.dev,
+                               "DMA Rx slave config failed, err = %d\n", ret);
+               return ret;
+       }
+
+       sport->dma_rx_desc = dmaengine_prep_dma_cyclic(sport->dma_rx_chan,
+                                sg_dma_address(&sport->rx_sgl),
+                                sport->rx_sgl.length,
+                                sport->rx_sgl.length / 2,
+                                DMA_DEV_TO_MEM,
+                                DMA_PREP_INTERRUPT);
+       if (!sport->dma_rx_desc) {
+               dev_err(sport->port.dev, "Cannot prepare cyclic DMA\n");
+               return -EFAULT;
+       }
+
+       sport->dma_rx_desc->callback = lpuart_dma_rx_complete;
+       sport->dma_rx_desc->callback_param = sport;
+       sport->dma_rx_cookie = dmaengine_submit(sport->dma_rx_desc);
+       dma_async_issue_pending(sport->dma_rx_chan);
+
+       writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_RDMAS,
+                               sport->port.membase + UARTCR5);
+
+       return 0;
+}
+
+static void lpuart_dma_rx_free(struct uart_port *port)
+{
+       struct lpuart_port *sport = container_of(port,
+                                       struct lpuart_port, port);
+
+       if (sport->dma_rx_chan)
+               dmaengine_terminate_all(sport->dma_rx_chan);
+
+       dma_unmap_sg(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+       kfree(sport->rx_ring.buf);
+       sport->rx_ring.tail = 0;
+       sport->rx_ring.head = 0;
+       sport->dma_rx_desc = NULL;
+       sport->dma_rx_cookie = -EINVAL;
 }
 
 static unsigned int lpuart_get_mctrl(struct uart_port *port)
@@ -921,13 +1018,16 @@ static void lpuart_setup_watermark(struct lpuart_port *sport)
        writeb(val | UARTPFIFO_TXFE | UARTPFIFO_RXFE,
                        sport->port.membase + UARTPFIFO);
 
-       /* explicitly clear RDRF */
-       readb(sport->port.membase + UARTSR1);
-
        /* flush Tx and Rx FIFO */
        writeb(UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH,
                        sport->port.membase + UARTCFIFO);
 
+       /* explicitly clear RDRF */
+       if (readb(sport->port.membase + UARTSR1) & UARTSR1_RDRF) {
+               readb(sport->port.membase + UARTDR);
+               writeb(UARTSFIFO_RXUF, sport->port.membase + UARTSFIFO);
+       }
+
        writeb(0, sport->port.membase + UARTTWFIFO);
        writeb(1, sport->port.membase + UARTRWFIFO);
 
@@ -960,110 +1060,12 @@ static void lpuart32_setup_watermark(struct lpuart_port *sport)
        lpuart32_write(ctrl_saved, sport->port.membase + UARTCTRL);
 }
 
-static int lpuart_dma_tx_request(struct uart_port *port)
-{
-       struct lpuart_port *sport = container_of(port,
-                                       struct lpuart_port, port);
-       struct dma_slave_config dma_tx_sconfig;
-       dma_addr_t dma_bus;
-       unsigned char *dma_buf;
-       int ret;
-
-       dma_bus = dma_map_single(sport->dma_tx_chan->device->dev,
-                               sport->port.state->xmit.buf,
-                               UART_XMIT_SIZE, DMA_TO_DEVICE);
-
-       if (dma_mapping_error(sport->dma_tx_chan->device->dev, dma_bus)) {
-               dev_err(sport->port.dev, "dma_map_single tx failed\n");
-               return -ENOMEM;
-       }
-
-       dma_buf = sport->port.state->xmit.buf;
-       dma_tx_sconfig.dst_addr = sport->port.mapbase + UARTDR;
-       dma_tx_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
-       dma_tx_sconfig.dst_maxburst = sport->txfifo_size;
-       dma_tx_sconfig.direction = DMA_MEM_TO_DEV;
-       ret = dmaengine_slave_config(sport->dma_tx_chan, &dma_tx_sconfig);
-
-       if (ret < 0) {
-               dev_err(sport->port.dev,
-                               "Dma slave config failed, err = %d\n", ret);
-               return ret;
-       }
-
-       sport->dma_tx_buf_virt = dma_buf;
-       sport->dma_tx_buf_bus = dma_bus;
-       sport->dma_tx_in_progress = 0;
-
-       return 0;
-}
-
-static int lpuart_dma_rx_request(struct uart_port *port)
-{
-       struct lpuart_port *sport = container_of(port,
-                                       struct lpuart_port, port);
-       struct dma_slave_config dma_rx_sconfig;
-       dma_addr_t dma_bus;
-       unsigned char *dma_buf;
-       int ret;
-
-       dma_buf = devm_kzalloc(sport->port.dev,
-                               FSL_UART_RX_DMA_BUFFER_SIZE, GFP_KERNEL);
-
-       if (!dma_buf) {
-               dev_err(sport->port.dev, "Dma rx alloc failed\n");
-               return -ENOMEM;
-       }
-
-       dma_bus = dma_map_single(sport->dma_rx_chan->device->dev, dma_buf,
-                               FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
-
-       if (dma_mapping_error(sport->dma_rx_chan->device->dev, dma_bus)) {
-               dev_err(sport->port.dev, "dma_map_single rx failed\n");
-               return -ENOMEM;
-       }
-
-       dma_rx_sconfig.src_addr = sport->port.mapbase + UARTDR;
-       dma_rx_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
-       dma_rx_sconfig.src_maxburst = 1;
-       dma_rx_sconfig.direction = DMA_DEV_TO_MEM;
-       ret = dmaengine_slave_config(sport->dma_rx_chan, &dma_rx_sconfig);
-
-       if (ret < 0) {
-               dev_err(sport->port.dev,
-                               "Dma slave config failed, err = %d\n", ret);
-               return ret;
-       }
-
-       sport->dma_rx_buf_virt = dma_buf;
-       sport->dma_rx_buf_bus = dma_bus;
-       sport->dma_rx_in_progress = 0;
-
-       return 0;
-}
-
-static void lpuart_dma_tx_free(struct uart_port *port)
+static void rx_dma_timer_init(struct lpuart_port *sport)
 {
-       struct lpuart_port *sport = container_of(port,
-                                       struct lpuart_port, port);
-
-       dma_unmap_single(sport->port.dev, sport->dma_tx_buf_bus,
-                       UART_XMIT_SIZE, DMA_TO_DEVICE);
-
-       sport->dma_tx_buf_bus = 0;
-       sport->dma_tx_buf_virt = NULL;
-}
-
-static void lpuart_dma_rx_free(struct uart_port *port)
-{
-       struct lpuart_port *sport = container_of(port,
-                                       struct lpuart_port, port);
-
-       dma_unmap_single(sport->port.dev, sport->dma_rx_buf_bus,
-                       FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
-
-       sport->dma_rx_buf_bus = 0;
-       sport->dma_rx_buf_virt = NULL;
+               setup_timer(&sport->lpuart_timer, lpuart_timer_func,
+                               (unsigned long)sport);
+               sport->lpuart_timer.expires = jiffies + sport->dma_rx_timeout;
+               add_timer(&sport->lpuart_timer);
 }
 
 static int lpuart_startup(struct uart_port *port)
@@ -1084,22 +1086,6 @@ static int lpuart_startup(struct uart_port *port)
        sport->rxfifo_size = 0x1 << (((temp >> UARTPFIFO_RXSIZE_OFF) &
                UARTPFIFO_FIFOSIZE_MASK) + 1);
 
-       if (sport->dma_rx_chan && !lpuart_dma_rx_request(port)) {
-               sport->lpuart_dma_rx_use = true;
-               setup_timer(&sport->lpuart_timer, lpuart_timer_func,
-                           (unsigned long)sport);
-       } else
-               sport->lpuart_dma_rx_use = false;
-
-
-       if (sport->dma_tx_chan && !lpuart_dma_tx_request(port)) {
-               sport->lpuart_dma_tx_use = true;
-               temp = readb(port->membase + UARTCR5);
-               temp &= ~UARTCR5_RDMAS;
-               writeb(temp | UARTCR5_TDMAS, port->membase + UARTCR5);
-       } else
-               sport->lpuart_dma_tx_use = false;
-
        ret = devm_request_irq(port->dev, port->irq, lpuart_int, 0,
                                DRIVER_NAME, sport);
        if (ret)
@@ -1113,7 +1099,29 @@ static int lpuart_startup(struct uart_port *port)
        temp |= (UARTCR2_RIE | UARTCR2_TIE | UARTCR2_RE | UARTCR2_TE);
        writeb(temp, sport->port.membase + UARTCR2);
 
+       if (sport->dma_rx_chan && !lpuart_start_rx_dma(sport)) {
+               /* set Rx DMA timeout */
+               sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
+               if (!sport->dma_rx_timeout)
+                    sport->dma_rx_timeout = 1;
+
+               sport->lpuart_dma_rx_use = true;
+               rx_dma_timer_init(sport);
+       } else {
+               sport->lpuart_dma_rx_use = false;
+       }
+
+       if (sport->dma_tx_chan && !lpuart_dma_tx_request(port)) {
+               init_waitqueue_head(&sport->dma_wait);
+               sport->lpuart_dma_tx_use = true;
+               temp = readb(port->membase + UARTCR5);
+               writeb(temp | UARTCR5_TDMAS, port->membase + UARTCR5);
+       } else {
+               sport->lpuart_dma_tx_use = false;
+       }
+
        spin_unlock_irqrestore(&sport->port.lock, flags);
+
        return 0;
 }
 
@@ -1170,12 +1178,19 @@ static void lpuart_shutdown(struct uart_port *port)
        devm_free_irq(port->dev, port->irq, sport);
 
        if (sport->lpuart_dma_rx_use) {
-               lpuart_dma_rx_free(&sport->port);
                del_timer_sync(&sport->lpuart_timer);
+               lpuart_dma_rx_free(&sport->port);
        }
 
-       if (sport->lpuart_dma_tx_use)
-               lpuart_dma_tx_free(&sport->port);
+       if (sport->lpuart_dma_tx_use) {
+               if (wait_event_interruptible(sport->dma_wait,
+                       !sport->dma_tx_in_progress) != false) {
+                       sport->dma_tx_in_progress = false;
+                       dmaengine_terminate_all(sport->dma_tx_chan);
+               }
+
+               lpuart_stop_tx(port);
+       }
 }
 
 static void lpuart32_shutdown(struct uart_port *port)
@@ -1203,13 +1218,14 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios,
 {
        struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
        unsigned long flags;
-       unsigned char cr1, old_cr1, old_cr2, cr4, bdh, modem;
+       unsigned char cr1, old_cr1, old_cr2, cr3, cr4, bdh, modem;
        unsigned int  baud;
        unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
        unsigned int sbr, brfa;
 
        cr1 = old_cr1 = readb(sport->port.membase + UARTCR1);
        old_cr2 = readb(sport->port.membase + UARTCR2);
+       cr3 = readb(sport->port.membase + UARTCR3);
        cr4 = readb(sport->port.membase + UARTCR4);
        bdh = readb(sport->port.membase + UARTBDH);
        modem = readb(sport->port.membase + UARTMODEM);
@@ -1257,7 +1273,10 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios,
        if ((termios->c_cflag & PARENB)) {
                if (termios->c_cflag & CMSPAR) {
                        cr1 &= ~UARTCR1_PE;
-                       cr1 |= UARTCR1_M;
+                       if (termios->c_cflag & PARODD)
+                               cr3 |= UARTCR3_T8;
+                       else
+                               cr3 &= ~UARTCR3_T8;
                } else {
                        cr1 |= UARTCR1_PE;
                        if ((termios->c_cflag & CSIZE) == CS8)
@@ -1297,17 +1316,6 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios,
        /* update the per-port timeout */
        uart_update_timeout(port, termios->c_cflag, baud);
 
-       if (sport->lpuart_dma_rx_use) {
-               /* Calculate delay for 1.5 DMA buffers */
-               sport->dma_rx_timeout = (sport->port.timeout - HZ / 50) *
-                                       FSL_UART_RX_DMA_BUFFER_SIZE * 3 /
-                                       sport->rxfifo_size / 2;
-               dev_dbg(port->dev, "DMA Rx t-out %ums, tty t-out %u jiffies\n",
-                       sport->dma_rx_timeout * 1000 / HZ, sport->port.timeout);
-               if (sport->dma_rx_timeout < msecs_to_jiffies(20))
-                       sport->dma_rx_timeout = msecs_to_jiffies(20);
-       }
-
        /* wait transmit engin complete */
        while (!(readb(sport->port.membase + UARTSR1) & UARTSR1_TC))
                barrier();
@@ -1325,12 +1333,31 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios,
        writeb(cr4 | brfa, sport->port.membase + UARTCR4);
        writeb(bdh, sport->port.membase + UARTBDH);
        writeb(sbr & 0xFF, sport->port.membase + UARTBDL);
+       writeb(cr3, sport->port.membase + UARTCR3);
        writeb(cr1, sport->port.membase + UARTCR1);
        writeb(modem, sport->port.membase + UARTMODEM);
 
        /* restore control register */
        writeb(old_cr2, sport->port.membase + UARTCR2);
 
+       /*
+        * If new baud rate is set, we will also need to update the Ring buffer
+        * length according to the selected baud rate and restart Rx DMA path.
+        */
+       if (old) {
+               if (sport->lpuart_dma_rx_use) {
+                       del_timer_sync(&sport->lpuart_timer);
+                       lpuart_dma_rx_free(&sport->port);
+               }
+
+               if (sport->dma_rx_chan && !lpuart_start_rx_dma(sport)) {
+                       sport->lpuart_dma_rx_use = true;
+                       rx_dma_timer_init(sport);
+               } else {
+                       sport->lpuart_dma_rx_use = false;
+               }
+       }
+
        spin_unlock_irqrestore(&sport->port.lock, flags);
 }
 
@@ -1922,6 +1949,8 @@ static int lpuart_suspend(struct device *dev)
        }
 
        uart_suspend_port(&lpuart_reg, &sport->port);
+       if (sport->port.suspended && !sport->port.irq_wake)
+               clk_disable_unprepare(sport->clk);
 
        return 0;
 }
@@ -1931,6 +1960,9 @@ static int lpuart_resume(struct device *dev)
        struct lpuart_port *sport = dev_get_drvdata(dev);
        unsigned long temp;
 
+       if (sport->port.suspended && !sport->port.irq_wake)
+               clk_prepare_enable(sport->clk);
+
        if (sport->lpuart32) {
                lpuart32_setup_watermark(sport);
                temp = lpuart32_read(sport->port.membase + UARTCTRL);