wifi: mt76: switch to page_pool allocator
authorLorenzo Bianconi <lorenzo@kernel.org>
Tue, 17 Jan 2023 13:53:15 +0000 (14:53 +0100)
committerFelix Fietkau <nbd@nbd.name>
Fri, 3 Feb 2023 13:47:17 +0000 (14:47 +0100)
In order to reduce possible memory allocation failures due to memory
fragmentation caused by page_frag_cache allocator, switch to page_pool
allocator for dma and usb mt76 drivers.
Remove per rx-queue page_frag_cache

Co-developed-by: Felix Fietkau <nbd@nbd.name>
Tested-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
drivers/net/wireless/mediatek/mt76/Kconfig
drivers/net/wireless/mediatek/mt76/dma.c
drivers/net/wireless/mediatek/mt76/mac80211.c
drivers/net/wireless/mediatek/mt76/mt76.h
drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
drivers/net/wireless/mediatek/mt76/usb.c

index d7f90a0eb21e54fae124ac6f5ee9e1dfdc56c828..18152c16c36f22f0f55f4a4cd8eace052317c686 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config MT76_CORE
        tristate
+       select PAGE_POOL
 
 config MT76_LEDS
        bool
index d29cb6cda2a997deca23e63384711b490fa90df3..e8f43770a4277946c05695f95eb72981e6039e71 100644 (file)
@@ -173,7 +173,7 @@ mt76_free_pending_rxwi(struct mt76_dev *dev)
        local_bh_disable();
        while ((t = __mt76_get_rxwi(dev)) != NULL) {
                if (t->ptr)
-                       skb_free_frag(t->ptr);
+                       mt76_put_page_pool_buf(t->ptr, false);
                kfree(t);
        }
        local_bh_enable();
@@ -408,9 +408,9 @@ mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
                if (!t)
                        return NULL;
 
-               dma_unmap_single(dev->dma_dev, t->dma_addr,
-                                SKB_WITH_OVERHEAD(q->buf_size),
-                                DMA_FROM_DEVICE);
+               dma_sync_single_for_cpu(dev->dma_dev, t->dma_addr,
+                               SKB_WITH_OVERHEAD(q->buf_size),
+                               page_pool_get_dma_dir(q->page_pool));
 
                buf = t->ptr;
                t->dma_addr = 0;
@@ -427,9 +427,9 @@ mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
        } else {
                buf = e->buf;
                e->buf = NULL;
-               dma_unmap_single(dev->dma_dev, e->dma_addr[0],
-                                SKB_WITH_OVERHEAD(q->buf_size),
-                                DMA_FROM_DEVICE);
+               dma_sync_single_for_cpu(dev->dma_dev, e->dma_addr[0],
+                               SKB_WITH_OVERHEAD(q->buf_size),
+                               page_pool_get_dma_dir(q->page_pool));
        }
 
        return buf;
@@ -581,11 +581,11 @@ free_skb:
 }
 
 static int
-mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
+mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q,
+                bool allow_direct)
 {
        int len = SKB_WITH_OVERHEAD(q->buf_size);
-       int frames = 0, offset = q->buf_offset;
-       dma_addr_t addr;
+       int frames = 0;
 
        if (!q->ndesc)
                return 0;
@@ -593,26 +593,25 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
        spin_lock_bh(&q->lock);
 
        while (q->queued < q->ndesc - 1) {
+               enum dma_data_direction dir;
                struct mt76_queue_buf qbuf;
-               void *buf = NULL;
+               dma_addr_t addr;
+               int offset;
+               void *buf;
 
-               buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
+               buf = mt76_get_page_pool_buf(q, &offset, q->buf_size);
                if (!buf)
                        break;
 
-               addr = dma_map_single(dev->dma_dev, buf, len, DMA_FROM_DEVICE);
-               if (unlikely(dma_mapping_error(dev->dma_dev, addr))) {
-                       skb_free_frag(buf);
-                       break;
-               }
+               addr = page_pool_get_dma_addr(virt_to_head_page(buf)) + offset;
+               dir = page_pool_get_dma_dir(q->page_pool);
+               dma_sync_single_for_device(dev->dma_dev, addr, len, dir);
 
-               qbuf.addr = addr + offset;
-               qbuf.len = len - offset;
+               qbuf.addr = addr + q->buf_offset;
+               qbuf.len = len - q->buf_offset;
                qbuf.skip_unmap = false;
                if (mt76_dma_add_rx_buf(dev, q, &qbuf, buf) < 0) {
-                       dma_unmap_single(dev->dma_dev, addr, len,
-                                        DMA_FROM_DEVICE);
-                       skb_free_frag(buf);
+                       mt76_put_page_pool_buf(buf, allow_direct);
                        break;
                }
                frames++;
@@ -653,7 +652,7 @@ mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q)
                /* WED txfree queue needs ring to be initialized before setup */
                q->flags = 0;
                mt76_dma_queue_reset(dev, q);
-               mt76_dma_rx_fill(dev, q);
+               mt76_dma_rx_fill(dev, q, false);
                q->flags = flags;
 
                ret = mtk_wed_device_txfree_ring_setup(wed, q->regs);
@@ -700,6 +699,10 @@ mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
        if (!q->entry)
                return -ENOMEM;
 
+       ret = mt76_create_page_pool(dev, q);
+       if (ret)
+               return ret;
+
        ret = mt76_dma_wed_setup(dev, q);
        if (ret)
                return ret;
@@ -713,7 +716,6 @@ mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
 static void
 mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
 {
-       struct page *page;
        void *buf;
        bool more;
 
@@ -727,7 +729,7 @@ mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
                if (!buf)
                        break;
 
-               skb_free_frag(buf);
+               mt76_put_page_pool_buf(buf, false);
        } while (1);
 
        if (q->rx_head) {
@@ -736,13 +738,6 @@ mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
        }
 
        spin_unlock_bh(&q->lock);
-
-       if (!q->rx_page.va)
-               return;
-
-       page = virt_to_page(q->rx_page.va);
-       __page_frag_cache_drain(page, q->rx_page.pagecnt_bias);
-       memset(&q->rx_page, 0, sizeof(q->rx_page));
 }
 
 static void
@@ -759,7 +754,7 @@ mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid)
 
        mt76_dma_rx_cleanup(dev, q);
        mt76_dma_sync_idx(dev, q);
-       mt76_dma_rx_fill(dev, q);
+       mt76_dma_rx_fill(dev, q, false);
 }
 
 static void
@@ -776,7 +771,7 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
 
                skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size);
        } else {
-               skb_free_frag(data);
+               mt76_put_page_pool_buf(data, true);
        }
 
        if (more)
@@ -849,6 +844,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
                        goto free_frag;
 
                skb_reserve(skb, q->buf_offset);
+               skb_mark_for_recycle(skb);
 
                *(u32 *)skb->cb = info;
 
@@ -864,10 +860,10 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
                continue;
 
 free_frag:
-               skb_free_frag(data);
+               mt76_put_page_pool_buf(data, true);
        }
 
-       mt76_dma_rx_fill(dev, q);
+       mt76_dma_rx_fill(dev, q, true);
        return done;
 }
 
@@ -910,7 +906,7 @@ mt76_dma_init(struct mt76_dev *dev,
 
        mt76_for_each_q_rx(dev, i) {
                netif_napi_add(&dev->napi_dev, &dev->napi[i], poll);
-               mt76_dma_rx_fill(dev, &dev->q_rx[i]);
+               mt76_dma_rx_fill(dev, &dev->q_rx[i], false);
                napi_enable(&dev->napi[i]);
        }
 
@@ -961,6 +957,8 @@ void mt76_dma_cleanup(struct mt76_dev *dev)
 
                netif_napi_del(&dev->napi[i]);
                mt76_dma_rx_cleanup(dev, q);
+
+               page_pool_destroy(q->page_pool);
        }
 
        mt76_free_pending_txwi(dev);
index 7fe7f68acc24bc50da59e3cb435b5df45ce8969e..dc42fff166e8de38475b67052d6b0dd26943a5fa 100644 (file)
@@ -4,6 +4,7 @@
  */
 #include <linux/sched.h>
 #include <linux/of.h>
+#include <net/page_pool.h>
 #include "mt76.h"
 
 #define CHAN2G(_idx, _freq) {                  \
@@ -556,6 +557,47 @@ void mt76_unregister_phy(struct mt76_phy *phy)
 }
 EXPORT_SYMBOL_GPL(mt76_unregister_phy);
 
+int mt76_create_page_pool(struct mt76_dev *dev, struct mt76_queue *q)
+{
+       struct page_pool_params pp_params = {
+               .order = 0,
+               .flags = PP_FLAG_PAGE_FRAG,
+               .nid = NUMA_NO_NODE,
+               .dev = dev->dma_dev,
+       };
+       int idx = q - dev->q_rx;
+
+       switch (idx) {
+       case MT_RXQ_MAIN:
+       case MT_RXQ_BAND1:
+       case MT_RXQ_BAND2:
+               pp_params.pool_size = 256;
+               break;
+       default:
+               pp_params.pool_size = 16;
+               break;
+       }
+
+       if (mt76_is_mmio(dev)) {
+               /* rely on page_pool for DMA mapping */
+               pp_params.flags |= PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+               pp_params.dma_dir = DMA_FROM_DEVICE;
+               pp_params.max_len = PAGE_SIZE;
+               pp_params.offset = 0;
+       }
+
+       q->page_pool = page_pool_create(&pp_params);
+       if (IS_ERR(q->page_pool)) {
+               int err = PTR_ERR(q->page_pool);
+
+               q->page_pool = NULL;
+               return err;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mt76_create_page_pool);
+
 struct mt76_dev *
 mt76_alloc_device(struct device *pdev, unsigned int size,
                  const struct ieee80211_ops *ops,
index 4a1d8be06b0a70f5e2a1561a49ad1ee9d2472292..2ed247e7c00da0fc680282291fbc6e872903999f 100644 (file)
@@ -202,7 +202,7 @@ struct mt76_queue {
 
        dma_addr_t desc_dma;
        struct sk_buff *rx_head;
-       struct page_frag_cache rx_page;
+       struct page_pool *page_pool;
 };
 
 struct mt76_mcu_ops {
@@ -1423,6 +1423,25 @@ void __mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked);
 struct mt76_txwi_cache *mt76_rx_token_release(struct mt76_dev *dev, int token);
 int mt76_rx_token_consume(struct mt76_dev *dev, void *ptr,
                          struct mt76_txwi_cache *r, dma_addr_t phys);
+int mt76_create_page_pool(struct mt76_dev *dev, struct mt76_queue *q);
+static inline void mt76_put_page_pool_buf(void *buf, bool allow_direct)
+{
+       struct page *page = virt_to_head_page(buf);
+
+       page_pool_put_full_page(page->pp, page, allow_direct);
+}
+
+static inline void *
+mt76_get_page_pool_buf(struct mt76_queue *q, u32 *offset, u32 size)
+{
+       struct page *page;
+
+       page = page_pool_dev_alloc_frag(q->page_pool, offset, size);
+       if (!page)
+               return NULL;
+
+       return page_address(page) + *offset;
+}
 
 static inline void mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked)
 {
index 65b5f3c4a05ee33752f59fa719a415f1af375c44..d9cc7b6a4b57ca71f16573483fb101cd10050dba 100644 (file)
@@ -594,13 +594,9 @@ static void mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
 static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed)
 {
        struct mt7915_dev *dev;
-       u32 length;
        int i;
 
        dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
-       length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
-                               sizeof(struct skb_shared_info));
-
        for (i = 0; i < dev->mt76.rx_token_size; i++) {
                struct mt76_txwi_cache *t;
 
@@ -608,9 +604,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed)
                if (!t || !t->ptr)
                        continue;
 
-               dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
-                                wed->wlan.rx_size, DMA_FROM_DEVICE);
-               __free_pages(virt_to_page(t->ptr), get_order(length));
+               mt76_put_page_pool_buf(t->ptr, false);
                t->ptr = NULL;
 
                mt76_put_rxwi(&dev->mt76, t);
@@ -620,47 +614,38 @@ static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed)
 static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
 {
        struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
+       struct mt76_txwi_cache *t = NULL;
        struct mt7915_dev *dev;
-       u32 length;
-       int i;
+       struct mt76_queue *q;
+       int i, len;
 
        dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
-       length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
-                               sizeof(struct skb_shared_info));
+       q = &dev->mt76.q_rx[MT_RXQ_MAIN];
+       len = SKB_WITH_OVERHEAD(q->buf_size);
 
        for (i = 0; i < size; i++) {
-               struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
-               dma_addr_t phy_addr;
-               struct page *page;
+               enum dma_data_direction dir;
+               dma_addr_t addr;
+               u32 offset;
                int token;
-               void *ptr;
+               void *buf;
 
+               t = mt76_get_rxwi(&dev->mt76);
                if (!t)
                        goto unmap;
 
-               page = __dev_alloc_pages(GFP_KERNEL, get_order(length));
-               if (!page) {
-                       mt76_put_rxwi(&dev->mt76, t);
+               buf = mt76_get_page_pool_buf(q, &offset, q->buf_size);
+               if (!buf)
                        goto unmap;
-               }
 
-               ptr = page_address(page);
-               phy_addr = dma_map_single(dev->mt76.dma_dev, ptr,
-                                         wed->wlan.rx_size,
-                                         DMA_TO_DEVICE);
-               if (unlikely(dma_mapping_error(dev->mt76.dev, phy_addr))) {
-                       __free_pages(page, get_order(length));
-                       mt76_put_rxwi(&dev->mt76, t);
-                       goto unmap;
-               }
+               addr = page_pool_get_dma_addr(virt_to_head_page(buf)) + offset;
+               dir = page_pool_get_dma_dir(q->page_pool);
+               dma_sync_single_for_device(dev->mt76.dma_dev, addr, len, dir);
 
-               desc->buf0 = cpu_to_le32(phy_addr);
-               token = mt76_rx_token_consume(&dev->mt76, ptr, t, phy_addr);
+               desc->buf0 = cpu_to_le32(addr);
+               token = mt76_rx_token_consume(&dev->mt76, buf, t, addr);
                if (token < 0) {
-                       dma_unmap_single(dev->mt76.dma_dev, phy_addr,
-                                        wed->wlan.rx_size, DMA_TO_DEVICE);
-                       __free_pages(page, get_order(length));
-                       mt76_put_rxwi(&dev->mt76, t);
+                       mt76_put_page_pool_buf(buf, false);
                        goto unmap;
                }
 
@@ -672,6 +657,8 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
        return 0;
 
 unmap:
+       if (t)
+               mt76_put_rxwi(&dev->mt76, t);
        mt7915_mmio_wed_release_rx_buf(wed);
        return -ENOMEM;
 }
index 3e281715fcd4de2a97fad48bf27e891e8b8d9612..b88959ef38aa6c0f9c4e6b5d9a8e5d7d1fbdd8ec 100644 (file)
@@ -319,29 +319,27 @@ mt76u_set_endpoints(struct usb_interface *intf,
 
 static int
 mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
-                int nsgs, gfp_t gfp)
+                int nsgs)
 {
        int i;
 
        for (i = 0; i < nsgs; i++) {
-               struct page *page;
                void *data;
                int offset;
 
-               data = page_frag_alloc(&q->rx_page, q->buf_size, gfp);
+               data = mt76_get_page_pool_buf(q, &offset, q->buf_size);
                if (!data)
                        break;
 
-               page = virt_to_head_page(data);
-               offset = data - page_address(page);
-               sg_set_page(&urb->sg[i], page, q->buf_size, offset);
+               sg_set_page(&urb->sg[i], virt_to_head_page(data), q->buf_size,
+                           offset);
        }
 
        if (i < nsgs) {
                int j;
 
                for (j = nsgs; j < urb->num_sgs; j++)
-                       skb_free_frag(sg_virt(&urb->sg[j]));
+                       mt76_put_page_pool_buf(sg_virt(&urb->sg[j]), false);
                urb->num_sgs = i;
        }
 
@@ -354,15 +352,16 @@ mt76u_fill_rx_sg(struct mt76_dev *dev, struct mt76_queue *q, struct urb *urb,
 
 static int
 mt76u_refill_rx(struct mt76_dev *dev, struct mt76_queue *q,
-               struct urb *urb, int nsgs, gfp_t gfp)
+               struct urb *urb, int nsgs)
 {
        enum mt76_rxq_id qid = q - &dev->q_rx[MT_RXQ_MAIN];
+       int offset;
 
        if (qid == MT_RXQ_MAIN && dev->usb.sg_en)
-               return mt76u_fill_rx_sg(dev, q, urb, nsgs, gfp);
+               return mt76u_fill_rx_sg(dev, q, urb, nsgs);
 
        urb->transfer_buffer_length = q->buf_size;
-       urb->transfer_buffer = page_frag_alloc(&q->rx_page, q->buf_size, gfp);
+       urb->transfer_buffer = mt76_get_page_pool_buf(q, &offset, q->buf_size);
 
        return urb->transfer_buffer ? 0 : -ENOMEM;
 }
@@ -400,7 +399,7 @@ mt76u_rx_urb_alloc(struct mt76_dev *dev, struct mt76_queue *q,
        if (err)
                return err;
 
-       return mt76u_refill_rx(dev, q, e->urb, sg_size, GFP_KERNEL);
+       return mt76u_refill_rx(dev, q, e->urb, sg_size);
 }
 
 static void mt76u_urb_free(struct urb *urb)
@@ -408,10 +407,10 @@ static void mt76u_urb_free(struct urb *urb)
        int i;
 
        for (i = 0; i < urb->num_sgs; i++)
-               skb_free_frag(sg_virt(&urb->sg[i]));
+               mt76_put_page_pool_buf(sg_virt(&urb->sg[i]), false);
 
        if (urb->transfer_buffer)
-               skb_free_frag(urb->transfer_buffer);
+               mt76_put_page_pool_buf(urb->transfer_buffer, false);
 
        usb_free_urb(urb);
 }
@@ -547,6 +546,8 @@ mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb,
                len -= data_len;
                nsgs++;
        }
+
+       skb_mark_for_recycle(skb);
        dev->drv->rx_skb(dev, MT_RXQ_MAIN, skb, NULL);
 
        return nsgs;
@@ -612,7 +613,7 @@ mt76u_process_rx_queue(struct mt76_dev *dev, struct mt76_queue *q)
 
                count = mt76u_process_rx_entry(dev, urb, q->buf_size);
                if (count > 0) {
-                       err = mt76u_refill_rx(dev, q, urb, count, GFP_ATOMIC);
+                       err = mt76u_refill_rx(dev, q, urb, count);
                        if (err < 0)
                                break;
                }
@@ -663,6 +664,10 @@ mt76u_alloc_rx_queue(struct mt76_dev *dev, enum mt76_rxq_id qid)
        struct mt76_queue *q = &dev->q_rx[qid];
        int i, err;
 
+       err = mt76_create_page_pool(dev, q);
+       if (err)
+               return err;
+
        spin_lock_init(&q->lock);
        q->entry = devm_kcalloc(dev->dev,
                                MT_NUM_RX_ENTRIES, sizeof(*q->entry),
@@ -691,7 +696,6 @@ EXPORT_SYMBOL_GPL(mt76u_alloc_mcu_queue);
 static void
 mt76u_free_rx_queue(struct mt76_dev *dev, struct mt76_queue *q)
 {
-       struct page *page;
        int i;
 
        for (i = 0; i < q->ndesc; i++) {
@@ -701,13 +705,7 @@ mt76u_free_rx_queue(struct mt76_dev *dev, struct mt76_queue *q)
                mt76u_urb_free(q->entry[i].urb);
                q->entry[i].urb = NULL;
        }
-
-       if (!q->rx_page.va)
-               return;
-
-       page = virt_to_page(q->rx_page.va);
-       __page_frag_cache_drain(page, q->rx_page.pagecnt_bias);
-       memset(&q->rx_page, 0, sizeof(q->rx_page));
+       page_pool_destroy(q->page_pool);
 }
 
 static void mt76u_free_rx(struct mt76_dev *dev)