net: thunderx: Cleanup receive buffer allocation
[linux-2.6-block.git] / drivers / net / ethernet / cavium / thunder / nicvf_queues.c
index f13289f0d2386d09e348b13c310346d42e20b74d..e4a02a96d4f05baf3df2c08bcafc14861b617fbd 100644 (file)
@@ -19,8 +19,6 @@
 #include "q_struct.h"
 #include "nicvf_queues.h"
 
-#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0)
-
 static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr)
 {
        /* Translation is installed only when IOMMU is present */
@@ -90,42 +88,97 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
        dmem->base = NULL;
 }
 
-/* Allocate buffer for packet reception
- * HW returns memory address where packet is DMA'ed but not a pointer
- * into RBDR ring, so save buffer address at the start of fragment and
- * align the start address to a cache aligned address
+/* Allocate a new page or recycle one if possible
+ *
+ * We cannot optimize dma mapping here, since
+ * 1. It's only one RBDR ring for 8 Rx queues.
+ * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
+ *    and not idx into RBDR ring, so can't refer to saved info.
+ * 3. There are multiple receive buffers per page
  */
-static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
-                                        u32 buf_len, u64 **rbuf)
+static struct pgcache *nicvf_alloc_page(struct nicvf *nic,
+                                       struct rbdr *rbdr, gfp_t gfp)
 {
-       int order = NICVF_PAGE_ORDER;
+       struct page *page = NULL;
+       struct pgcache *pgcache, *next;
+
+       /* Check if page is already allocated */
+       pgcache = &rbdr->pgcache[rbdr->pgidx];
+       page = pgcache->page;
+       /* Check if page can be recycled */
+       if (page && (page_ref_count(page) != 1))
+               page = NULL;
+
+       if (!page) {
+               page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0);
+               if (!page)
+                       return NULL;
+
+               this_cpu_inc(nic->pnicvf->drv_stats->page_alloc);
+
+               /* Check for space */
+               if (rbdr->pgalloc >= rbdr->pgcnt) {
+                       /* Page can still be used */
+                       nic->rb_page = page;
+                       return NULL;
+               }
+
+               /* Save the page in page cache */
+               pgcache->page = page;
+               rbdr->pgalloc++;
+       }
+
+       /* Take extra page reference for recycling */
+       page_ref_add(page, 1);
+
+       rbdr->pgidx++;
+       rbdr->pgidx &= (rbdr->pgcnt - 1);
+
+       /* Prefetch refcount of next page in page cache */
+       next = &rbdr->pgcache[rbdr->pgidx];
+       page = next->page;
+       if (page)
+               prefetch(&page->_refcount);
+
+       return pgcache;
+}
+
+/* Allocate buffer for packet reception */
+static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
+                                        gfp_t gfp, u32 buf_len, u64 *rbuf)
+{
+       struct pgcache *pgcache = NULL;
 
        /* Check if request can be accomodated in previous allocated page */
        if (nic->rb_page &&
-           ((nic->rb_page_offset + buf_len) < (PAGE_SIZE << order))) {
+           ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) {
                nic->rb_pageref++;
                goto ret;
        }
 
        nicvf_get_page(nic);
+       nic->rb_page = NULL;
 
-       /* Allocate a new page */
-       nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
-                                  order);
-       if (!nic->rb_page) {
+       /* Get new page, either recycled or new one */
+       pgcache = nicvf_alloc_page(nic, rbdr, gfp);
+       if (!pgcache && !nic->rb_page) {
                this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
                return -ENOMEM;
        }
+
        nic->rb_page_offset = 0;
+       /* Check if it's recycled */
+       if (pgcache)
+               nic->rb_page = pgcache->page;
 ret:
        /* HW will ensure data coherency, CPU sync not required */
-       *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
-                                               nic->rb_page_offset, buf_len,
-                                               DMA_FROM_DEVICE,
-                                               DMA_ATTR_SKIP_CPU_SYNC));
+       *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
+                                       nic->rb_page_offset, buf_len,
+                                       DMA_FROM_DEVICE,
+                                       DMA_ATTR_SKIP_CPU_SYNC);
        if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
                if (!nic->rb_page_offset)
-                       __free_pages(nic->rb_page, order);
+                       __free_pages(nic->rb_page, 0);
                nic->rb_page = NULL;
                return -ENOMEM;
        }
@@ -159,7 +212,7 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
                            int ring_len, int buf_size)
 {
        int idx;
-       u64 *rbuf;
+       u64 rbuf;
        struct rbdr_entry_t *desc;
        int err;
 
@@ -177,10 +230,26 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
        rbdr->head = 0;
        rbdr->tail = 0;
 
+       /* Initialize page recycling stuff.
+        *
+        * Can't use single buffer per page especially with 64K pages.
+        * On embedded platforms i.e 81xx/83xx available memory itself
+        * is low and minimum ring size of RBDR is 8K, that takes away
+        * lots of memory.
+        */
+       rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size);
+       rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt);
+       rbdr->pgcache = kzalloc(sizeof(*rbdr->pgcache) *
+                               rbdr->pgcnt, GFP_KERNEL);
+       if (!rbdr->pgcache)
+               return -ENOMEM;
+       rbdr->pgidx = 0;
+       rbdr->pgalloc = 0;
+
        nic->rb_page = NULL;
        for (idx = 0; idx < ring_len; idx++) {
-               err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN,
-                                            &rbuf);
+               err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL,
+                                            RCV_FRAG_LEN, &rbuf);
                if (err) {
                        /* To free already allocated and mapped ones */
                        rbdr->tail = idx - 1;
@@ -188,7 +257,7 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
                }
 
                desc = GET_RBDR_DESC(rbdr, idx);
-               desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
+               desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
        }
 
        nicvf_get_page(nic);
@@ -201,6 +270,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 {
        int head, tail;
        u64 buf_addr, phys_addr;
+       struct pgcache *pgcache;
        struct rbdr_entry_t *desc;
 
        if (!rbdr)
@@ -216,7 +286,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
        /* Release page references */
        while (head != tail) {
                desc = GET_RBDR_DESC(rbdr, head);
-               buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+               buf_addr = desc->buf_addr;
                phys_addr = nicvf_iova_to_phys(nic, buf_addr);
                dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
                                     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
@@ -227,13 +297,25 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
        }
        /* Release buffer of tail desc */
        desc = GET_RBDR_DESC(rbdr, tail);
-       buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+       buf_addr = desc->buf_addr;
        phys_addr = nicvf_iova_to_phys(nic, buf_addr);
        dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
                             DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
        if (phys_addr)
                put_page(virt_to_page(phys_to_virt(phys_addr)));
 
+       /* Sync page cache info */
+       smp_rmb();
+
+       /* Release additional page references held for recycling */
+       head = 0;
+       while (head < rbdr->pgcnt) {
+               pgcache = &rbdr->pgcache[head];
+               if (pgcache->page && page_ref_count(pgcache->page) != 0)
+                       put_page(pgcache->page);
+               head++;
+       }
+
        /* Free RBDR ring */
        nicvf_free_q_desc_mem(nic, &rbdr->dmem);
 }
@@ -248,7 +330,7 @@ static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
        int refill_rb_cnt;
        struct rbdr *rbdr;
        struct rbdr_entry_t *desc;
-       u64 *rbuf;
+       u64 rbuf;
        int new_rb = 0;
 
 refill:
@@ -269,17 +351,20 @@ refill:
        else
                refill_rb_cnt = qs->rbdr_len - qcount - 1;
 
+       /* Sync page cache info */
+       smp_rmb();
+
        /* Start filling descs from tail */
        tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
        while (refill_rb_cnt) {
                tail++;
                tail &= (rbdr->dmem.q_len - 1);
 
-               if (nicvf_alloc_rcv_buffer(nic, gfp, RCV_FRAG_LEN, &rbuf))
+               if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf))
                        break;
 
                desc = GET_RBDR_DESC(rbdr, tail);
-               desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
+               desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
                refill_rb_cnt--;
                new_rb++;
        }
@@ -1094,7 +1179,13 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
 {
        int proto;
        struct sq_hdr_subdesc *hdr;
+       union {
+               struct iphdr *v4;
+               struct ipv6hdr *v6;
+               unsigned char *hdr;
+       } ip;
 
+       ip.hdr = skb_network_header(skb);
        hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
        memset(hdr, 0, SND_QUEUE_DESC_SIZE);
        hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
@@ -1119,7 +1210,9 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
                hdr->l3_offset = skb_network_offset(skb);
                hdr->l4_offset = skb_transport_offset(skb);
 
-               proto = ip_hdr(skb)->protocol;
+               proto = (ip.v4->version == 4) ? ip.v4->protocol :
+                       ip.v6->nexthdr;
+
                switch (proto) {
                case IPPROTO_TCP:
                        hdr->csum_l4 = SEND_L4_CSUM_TCP;
@@ -1547,9 +1640,6 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
 /* Check for errors in the receive cmp.queue entry */
 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 {
-       if (!cqe_rx->err_level && !cqe_rx->err_opcode)
-               return 0;
-
        if (netif_msg_rx_err(nic))
                netdev_err(nic->netdev,
                           "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n",
@@ -1638,8 +1728,6 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
 {
        switch (cqe_tx->send_status) {
-       case CQ_TX_ERROP_GOOD:
-               return 0;
        case CQ_TX_ERROP_DESC_FAULT:
                this_cpu_inc(nic->drv_stats->tx_desc_fault);
                break;