Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-block.git] / drivers / net / wireless / intel / iwlwifi / pcie / rx.c
index 152cf9ad9566587320a4a8d5dd34140b3d6b24b9..07973ef826c15e7c8136b17ed0a09fe81697594c 100644 (file)
@@ -2,6 +2,7 @@
  *
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * Portions of this file are derived from the ipw3945 project, as well
  * as portions of the ieee80211 subsystem header files.
  */
 static int iwl_rxq_space(const struct iwl_rxq *rxq)
 {
-       /* Make sure RX_QUEUE_SIZE is a power of 2 */
-       BUILD_BUG_ON(RX_QUEUE_SIZE & (RX_QUEUE_SIZE - 1));
+       /* Make sure rx queue size is a power of 2 */
+       WARN_ON(rxq->queue_size & (rxq->queue_size - 1));
 
        /*
         * There can be up to (RX_QUEUE_SIZE - 1) free slots, to avoid ambiguity
@@ -149,7 +150,7 @@ static int iwl_rxq_space(const struct iwl_rxq *rxq)
         * The following is equivalent to modulo by RX_QUEUE_SIZE and is well
         * defined for negative dividends.
         */
-       return (rxq->read - rxq->write - 1) & (RX_QUEUE_SIZE - 1);
+       return (rxq->read - rxq->write - 1) & (rxq->queue_size - 1);
 }
 
 /*
@@ -160,6 +161,12 @@ static inline __le32 iwl_pcie_dma_addr2rbd_ptr(dma_addr_t dma_addr)
        return cpu_to_le32((u32)(dma_addr >> 8));
 }
 
+static void iwl_pcie_write_prph_64(struct iwl_trans *trans, u64 ofs, u64 val)
+{
+       iwl_write_prph(trans, ofs, val & 0xffffffff);
+       iwl_write_prph(trans, ofs + 4, val >> 32);
+}
+
 /*
  * iwl_pcie_rx_stop - stops the Rx DMA
  */
@@ -173,10 +180,9 @@ int iwl_pcie_rx_stop(struct iwl_trans *trans)
 /*
  * iwl_pcie_rxq_inc_wr_ptr - Update the write pointer for the RX queue
  */
-static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans)
+static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans,
+                                   struct iwl_rxq *rxq)
 {
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
        u32 reg;
 
        lockdep_assert_held(&rxq->lock);
@@ -201,24 +207,73 @@ static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans)
        }
 
        rxq->write_actual = round_down(rxq->write, 8);
-       iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
+       if (trans->cfg->mq_rx_supported)
+               iwl_write_prph(trans, RFH_Q_FRBDCB_WIDX(rxq->id),
+                              rxq->write_actual);
+       else
+               iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
 }
 
 static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
+       int i;
 
-       spin_lock(&rxq->lock);
+       for (i = 0; i < trans->num_rx_queues; i++) {
+               struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+
+               if (!rxq->need_update)
+                       continue;
+               spin_lock(&rxq->lock);
+               iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
+               rxq->need_update = false;
+               spin_unlock(&rxq->lock);
+       }
+}
+
+static void iwl_pcie_rxq_mq_restock(struct iwl_trans *trans,
+                                   struct iwl_rxq *rxq)
+{
+       struct iwl_rx_mem_buffer *rxb;
+
+       /*
+        * If the device isn't enabled - no need to try to add buffers...
+        * This can happen when we stop the device and still have an interrupt
+        * pending. We stop the APM before we sync the interrupts because we
+        * have to (see comment there). On the other hand, since the APM is
+        * stopped, we cannot access the HW (in particular not prph).
+        * So don't try to restock if the APM has been already stopped.
+        */
+       if (!test_bit(STATUS_DEVICE_ENABLED, &trans->status))
+               return;
 
-       if (!rxq->need_update)
-               goto exit_unlock;
+       spin_lock(&rxq->lock);
+       while (rxq->free_count) {
+               __le64 *bd = (__le64 *)rxq->bd;
 
-       iwl_pcie_rxq_inc_wr_ptr(trans);
-       rxq->need_update = false;
+               /* Get next free Rx buffer, remove from free list */
+               rxb = list_first_entry(&rxq->rx_free, struct iwl_rx_mem_buffer,
+                                      list);
+               list_del(&rxb->list);
 
- exit_unlock:
+               /* 12 first bits are expected to be empty */
+               WARN_ON(rxb->page_dma & DMA_BIT_MASK(12));
+               /* Point to Rx buffer via next RBD in circular buffer */
+               bd[rxq->write] = cpu_to_le64(rxb->page_dma | rxb->vid);
+               rxq->write = (rxq->write + 1) & MQ_RX_TABLE_MASK;
+               rxq->free_count--;
+       }
        spin_unlock(&rxq->lock);
+
+       /*
+        * If we've added more space for the firmware to place data, tell it.
+        * Increment device's write pointer in multiples of 8.
+        */
+       if (rxq->write_actual != (rxq->write & ~0x7)) {
+               spin_lock(&rxq->lock);
+               iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
+               spin_unlock(&rxq->lock);
+       }
 }
 
 /*
@@ -232,10 +287,8 @@ static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans)
  * also updates the memory address in the firmware to reference the new
  * target buffer.
  */
-static void iwl_pcie_rxq_restock(struct iwl_trans *trans)
+static void iwl_pcie_rxq_restock(struct iwl_trans *trans, struct iwl_rxq *rxq)
 {
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
        struct iwl_rx_mem_buffer *rxb;
 
        /*
@@ -251,6 +304,7 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans)
 
        spin_lock(&rxq->lock);
        while ((iwl_rxq_space(rxq) > 0) && (rxq->free_count)) {
+               __le32 *bd = (__le32 *)rxq->bd;
                /* The overwritten rxb must be a used one */
                rxb = rxq->queue[rxq->write];
                BUG_ON(rxb && rxb->page);
@@ -261,7 +315,7 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans)
                list_del(&rxb->list);
 
                /* Point to Rx buffer via next RBD in circular buffer */
-               rxq->bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma);
+               bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma);
                rxq->queue[rxq->write] = rxb;
                rxq->write = (rxq->write + 1) & RX_QUEUE_MASK;
                rxq->free_count--;
@@ -272,7 +326,7 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans)
         * Increment device's write pointer in multiples of 8. */
        if (rxq->write_actual != (rxq->write & ~0x7)) {
                spin_lock(&rxq->lock);
-               iwl_pcie_rxq_inc_wr_ptr(trans);
+               iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
                spin_unlock(&rxq->lock);
        }
 }
@@ -285,13 +339,9 @@ static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans,
                                           gfp_t priority)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
        struct page *page;
        gfp_t gfp_mask = priority;
 
-       if (rxq->free_count > RX_LOW_WATERMARK)
-               gfp_mask |= __GFP_NOWARN;
-
        if (trans_pcie->rx_page_order > 0)
                gfp_mask |= __GFP_COMP;
 
@@ -301,16 +351,13 @@ static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans,
                if (net_ratelimit())
                        IWL_DEBUG_INFO(trans, "alloc_pages failed, order: %d\n",
                                       trans_pcie->rx_page_order);
-               /* Issue an error if the hardware has consumed more than half
-                * of its free buffer list and we don't have enough
-                * pre-allocated buffers.
+               /*
+                * Issue an error if we don't have enough pre-allocated
+                 * buffers.
 `               */
-               if (rxq->free_count <= RX_LOW_WATERMARK &&
-                   iwl_rxq_space(rxq) > (RX_QUEUE_SIZE / 2) &&
-                   net_ratelimit())
+               if (!(gfp_mask & __GFP_NOWARN) && net_ratelimit())
                        IWL_CRIT(trans,
-                                "Failed to alloc_pages with GFP_KERNEL. Only %u free buffers remaining.\n",
-                                rxq->free_count);
+                                "Failed to alloc_pages\n");
                return NULL;
        }
        return page;
@@ -325,10 +372,10 @@ static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans,
  * iwl_pcie_rxq_restock. The latter function will update the HW to use the newly
  * allocated buffers.
  */
-static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority)
+static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority,
+                                  struct iwl_rxq *rxq)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
        struct iwl_rx_mem_buffer *rxb;
        struct page *page;
 
@@ -372,10 +419,6 @@ static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority)
                        __free_pages(page, trans_pcie->rx_page_order);
                        return;
                }
-               /* dma address must be no more than 36 bits */
-               BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36));
-               /* and also 256 byte aligned! */
-               BUG_ON(rxb->page_dma & DMA_BIT_MASK(8));
 
                spin_lock(&rxq->lock);
 
@@ -386,40 +429,23 @@ static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority)
        }
 }
 
-static void iwl_pcie_rxq_free_rbs(struct iwl_trans *trans)
+static void iwl_pcie_free_rbs_pool(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
        int i;
 
-       lockdep_assert_held(&rxq->lock);
-
-       for (i = 0; i < RX_QUEUE_SIZE; i++) {
-               if (!rxq->pool[i].page)
+       for (i = 0; i < MQ_RX_POOL_SIZE; i++) {
+               if (!trans_pcie->rx_pool[i].page)
                        continue;
-               dma_unmap_page(trans->dev, rxq->pool[i].page_dma,
+               dma_unmap_page(trans->dev, trans_pcie->rx_pool[i].page_dma,
                               PAGE_SIZE << trans_pcie->rx_page_order,
                               DMA_FROM_DEVICE);
-               __free_pages(rxq->pool[i].page, trans_pcie->rx_page_order);
-               rxq->pool[i].page = NULL;
+               __free_pages(trans_pcie->rx_pool[i].page,
+                            trans_pcie->rx_page_order);
+               trans_pcie->rx_pool[i].page = NULL;
        }
 }
 
-/*
- * iwl_pcie_rx_replenish - Move all used buffers from rx_used to rx_free
- *
- * When moving to rx_free an page is allocated for the slot.
- *
- * Also restock the Rx queue via iwl_pcie_rxq_restock.
- * This is called only during initialization
- */
-static void iwl_pcie_rx_replenish(struct iwl_trans *trans)
-{
-       iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL);
-
-       iwl_pcie_rxq_restock(trans);
-}
-
 /*
  * iwl_pcie_rx_allocator - Allocates pages in the background for RX queues
  *
@@ -444,6 +470,11 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans)
        while (pending) {
                int i;
                struct list_head local_allocated;
+               gfp_t gfp_mask = GFP_KERNEL;
+
+               /* Do not post a warning if there are only a few requests */
+               if (pending < RX_PENDING_WATERMARK)
+                       gfp_mask |= __GFP_NOWARN;
 
                INIT_LIST_HEAD(&local_allocated);
 
@@ -463,7 +494,7 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans)
                        BUG_ON(rxb->page);
 
                        /* Alloc a new receive buffer */
-                       page = iwl_pcie_rx_alloc_page(trans, GFP_KERNEL);
+                       page = iwl_pcie_rx_alloc_page(trans, gfp_mask);
                        if (!page)
                                continue;
                        rxb->page = page;
@@ -477,10 +508,6 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans)
                                __free_pages(page, trans_pcie->rx_page_order);
                                continue;
                        }
-                       /* dma address must be no more than 36 bits */
-                       BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36));
-                       /* and also 256 byte aligned! */
-                       BUG_ON(rxb->page_dma & DMA_BIT_MASK(8));
 
                        /* move the allocated entry to the out list */
                        list_move(&rxb->list, &local_allocated);
@@ -561,38 +588,83 @@ static void iwl_pcie_rx_allocator_work(struct work_struct *data)
 static int iwl_pcie_rx_alloc(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
        struct iwl_rb_allocator *rba = &trans_pcie->rba;
        struct device *dev = trans->dev;
+       int i;
+       int free_size = trans->cfg->mq_rx_supported ? sizeof(__le64) :
+                                                     sizeof(__le32);
+
+       if (WARN_ON(trans_pcie->rxq))
+               return -EINVAL;
 
-       memset(&trans_pcie->rxq, 0, sizeof(trans_pcie->rxq));
+       trans_pcie->rxq = kcalloc(trans->num_rx_queues, sizeof(struct iwl_rxq),
+                                 GFP_KERNEL);
+       if (!trans_pcie->rxq)
+               return -EINVAL;
 
-       spin_lock_init(&rxq->lock);
        spin_lock_init(&rba->lock);
 
-       if (WARN_ON(rxq->bd || rxq->rb_stts))
-               return -EINVAL;
+       for (i = 0; i < trans->num_rx_queues; i++) {
+               struct iwl_rxq *rxq = &trans_pcie->rxq[i];
 
-       /* Allocate the circular buffer of Read Buffer Descriptors (RBDs) */
-       rxq->bd = dma_zalloc_coherent(dev, sizeof(__le32) * RX_QUEUE_SIZE,
-                                     &rxq->bd_dma, GFP_KERNEL);
-       if (!rxq->bd)
-               goto err_bd;
+               spin_lock_init(&rxq->lock);
+               if (trans->cfg->mq_rx_supported)
+                       rxq->queue_size = MQ_RX_TABLE_SIZE;
+               else
+                       rxq->queue_size = RX_QUEUE_SIZE;
 
-       /*Allocate the driver's pointer to receive buffer status */
-       rxq->rb_stts = dma_zalloc_coherent(dev, sizeof(*rxq->rb_stts),
-                                          &rxq->rb_stts_dma, GFP_KERNEL);
-       if (!rxq->rb_stts)
-               goto err_rb_stts;
+               /*
+                * Allocate the circular buffer of Read Buffer Descriptors
+                * (RBDs)
+                */
+               rxq->bd = dma_zalloc_coherent(dev,
+                                            free_size * rxq->queue_size,
+                                            &rxq->bd_dma, GFP_KERNEL);
+               if (!rxq->bd)
+                       goto err;
+
+               if (trans->cfg->mq_rx_supported) {
+                       rxq->used_bd = dma_zalloc_coherent(dev,
+                                                          sizeof(__le32) *
+                                                          rxq->queue_size,
+                                                          &rxq->used_bd_dma,
+                                                          GFP_KERNEL);
+                       if (!rxq->used_bd)
+                               goto err;
+               }
 
+               /*Allocate the driver's pointer to receive buffer status */
+               rxq->rb_stts = dma_zalloc_coherent(dev, sizeof(*rxq->rb_stts),
+                                                  &rxq->rb_stts_dma,
+                                                  GFP_KERNEL);
+               if (!rxq->rb_stts)
+                       goto err;
+       }
        return 0;
 
-err_rb_stts:
-       dma_free_coherent(dev, sizeof(__le32) * RX_QUEUE_SIZE,
-                         rxq->bd, rxq->bd_dma);
-       rxq->bd_dma = 0;
-       rxq->bd = NULL;
-err_bd:
+err:
+       for (i = 0; i < trans->num_rx_queues; i++) {
+               struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+
+               if (rxq->bd)
+                       dma_free_coherent(dev, free_size * rxq->queue_size,
+                                         rxq->bd, rxq->bd_dma);
+               rxq->bd_dma = 0;
+               rxq->bd = NULL;
+
+               if (rxq->rb_stts)
+                       dma_free_coherent(trans->dev,
+                                         sizeof(struct iwl_rb_status),
+                                         rxq->rb_stts, rxq->rb_stts_dma);
+
+               if (rxq->used_bd)
+                       dma_free_coherent(dev, sizeof(__le32) * rxq->queue_size,
+                                         rxq->used_bd, rxq->used_bd_dma);
+               rxq->used_bd_dma = 0;
+               rxq->used_bd = NULL;
+       }
+       kfree(trans_pcie->rxq);
+
        return -ENOMEM;
 }
 
@@ -659,65 +731,103 @@ static void iwl_pcie_rx_hw_init(struct iwl_trans *trans, struct iwl_rxq *rxq)
                iwl_set_bit(trans, CSR_INT_COALESCING, IWL_HOST_INT_OPER_MODE);
 }
 
-static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq)
+static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans)
 {
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       u32 rb_size, enabled = 0;
        int i;
 
-       lockdep_assert_held(&rxq->lock);
-
-       INIT_LIST_HEAD(&rxq->rx_free);
-       INIT_LIST_HEAD(&rxq->rx_used);
-       rxq->free_count = 0;
-       rxq->used_count = 0;
+       switch (trans_pcie->rx_buf_size) {
+       case IWL_AMSDU_4K:
+               rb_size = RFH_RXF_DMA_RB_SIZE_4K;
+               break;
+       case IWL_AMSDU_8K:
+               rb_size = RFH_RXF_DMA_RB_SIZE_8K;
+               break;
+       case IWL_AMSDU_12K:
+               rb_size = RFH_RXF_DMA_RB_SIZE_12K;
+               break;
+       default:
+               WARN_ON(1);
+               rb_size = RFH_RXF_DMA_RB_SIZE_4K;
+       }
 
-       for (i = 0; i < RX_QUEUE_SIZE; i++)
-               list_add(&rxq->pool[i].list, &rxq->rx_used);
-}
+       /* Stop Rx DMA */
+       iwl_write_prph(trans, RFH_RXF_DMA_CFG, 0);
+       /* disable free amd used rx queue operation */
+       iwl_write_prph(trans, RFH_RXF_RXQ_ACTIVE, 0);
+
+       for (i = 0; i < trans->num_rx_queues; i++) {
+               /* Tell device where to find RBD free table in DRAM */
+               iwl_pcie_write_prph_64(trans, RFH_Q_FRBDCB_BA_LSB(i),
+                                      (u64)(trans_pcie->rxq[i].bd_dma));
+               /* Tell device where to find RBD used table in DRAM */
+               iwl_pcie_write_prph_64(trans, RFH_Q_URBDCB_BA_LSB(i),
+                                      (u64)(trans_pcie->rxq[i].used_bd_dma));
+               /* Tell device where in DRAM to update its Rx status */
+               iwl_pcie_write_prph_64(trans, RFH_Q_URBD_STTS_WPTR_LSB(i),
+                                      trans_pcie->rxq[i].rb_stts_dma);
+               /* Reset device indice tables */
+               iwl_write_prph(trans, RFH_Q_FRBDCB_WIDX(i), 0);
+               iwl_write_prph(trans, RFH_Q_FRBDCB_RIDX(i), 0);
+               iwl_write_prph(trans, RFH_Q_URBDCB_WIDX(i), 0);
+
+               enabled |= BIT(i) | BIT(i + 16);
+       }
 
-static void iwl_pcie_rx_init_rba(struct iwl_rb_allocator *rba)
-{
-       int i;
+       /* restock default queue */
+       iwl_pcie_rxq_mq_restock(trans, &trans_pcie->rxq[0]);
 
-       lockdep_assert_held(&rba->lock);
+       /*
+        * Enable Rx DMA
+        * Single frame mode
+        * Rx buffer size 4 or 8k or 12k
+        * Min RB size 4 or 8
+        * 512 RBDs
+        */
+       iwl_write_prph(trans, RFH_RXF_DMA_CFG,
+                      RFH_DMA_EN_ENABLE_VAL |
+                      rb_size | RFH_RXF_DMA_SINGLE_FRAME_MASK |
+                      RFH_RXF_DMA_MIN_RB_4_8 |
+                      RFH_RXF_DMA_RBDCB_SIZE_512);
 
-       INIT_LIST_HEAD(&rba->rbd_allocated);
-       INIT_LIST_HEAD(&rba->rbd_empty);
+       iwl_write_prph(trans, RFH_GEN_CFG, RFH_GEN_CFG_RFH_DMA_SNOOP |
+                                         RFH_GEN_CFG_SERVICE_DMA_SNOOP);
+       iwl_write_prph(trans, RFH_RXF_RXQ_ACTIVE, enabled);
 
-       for (i = 0; i < RX_POOL_SIZE; i++)
-               list_add(&rba->pool[i].list, &rba->rbd_empty);
+       /* Set interrupt coalescing timer to default (2048 usecs) */
+       iwl_write8(trans, CSR_INT_COALESCING, IWL_HOST_INT_TIMEOUT_DEF);
 }
 
-static void iwl_pcie_rx_free_rba(struct iwl_trans *trans)
+static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq)
 {
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rb_allocator *rba = &trans_pcie->rba;
-       int i;
+       lockdep_assert_held(&rxq->lock);
 
-       lockdep_assert_held(&rba->lock);
+       INIT_LIST_HEAD(&rxq->rx_free);
+       INIT_LIST_HEAD(&rxq->rx_used);
+       rxq->free_count = 0;
+       rxq->used_count = 0;
+}
 
-       for (i = 0; i < RX_POOL_SIZE; i++) {
-               if (!rba->pool[i].page)
-                       continue;
-               dma_unmap_page(trans->dev, rba->pool[i].page_dma,
-                              PAGE_SIZE << trans_pcie->rx_page_order,
-                              DMA_FROM_DEVICE);
-               __free_pages(rba->pool[i].page, trans_pcie->rx_page_order);
-               rba->pool[i].page = NULL;
-       }
+static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget)
+{
+       WARN_ON(1);
+       return 0;
 }
 
 int iwl_pcie_rx_init(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
+       struct iwl_rxq *def_rxq;
        struct iwl_rb_allocator *rba = &trans_pcie->rba;
-       int i, err;
+       int i, err, num_rbds, allocator_pool_size;
 
-       if (!rxq->bd) {
+       if (!trans_pcie->rxq) {
                err = iwl_pcie_rx_alloc(trans);
                if (err)
                        return err;
        }
+       def_rxq = trans_pcie->rxq;
        if (!rba->alloc_wq)
                rba->alloc_wq = alloc_workqueue("rb_allocator",
                                                WQ_HIGHPRI | WQ_UNBOUND, 1);
@@ -726,34 +836,68 @@ int iwl_pcie_rx_init(struct iwl_trans *trans)
        spin_lock(&rba->lock);
        atomic_set(&rba->req_pending, 0);
        atomic_set(&rba->req_ready, 0);
-       /* free all first - we might be reconfigured for a different size */
-       iwl_pcie_rx_free_rba(trans);
-       iwl_pcie_rx_init_rba(rba);
+       INIT_LIST_HEAD(&rba->rbd_allocated);
+       INIT_LIST_HEAD(&rba->rbd_empty);
        spin_unlock(&rba->lock);
 
-       spin_lock(&rxq->lock);
-
        /* free all first - we might be reconfigured for a different size */
-       iwl_pcie_rxq_free_rbs(trans);
-       iwl_pcie_rx_init_rxb_lists(rxq);
+       iwl_pcie_free_rbs_pool(trans);
 
        for (i = 0; i < RX_QUEUE_SIZE; i++)
-               rxq->queue[i] = NULL;
+               def_rxq->queue[i] = NULL;
 
-       /* Set us so that we have processed and used all buffers, but have
-        * not restocked the Rx queue with fresh buffers */
-       rxq->read = rxq->write = 0;
-       rxq->write_actual = 0;
-       memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts));
-       spin_unlock(&rxq->lock);
+       for (i = 0; i < trans->num_rx_queues; i++) {
+               struct iwl_rxq *rxq = &trans_pcie->rxq[i];
 
-       iwl_pcie_rx_replenish(trans);
+               rxq->id = i;
 
-       iwl_pcie_rx_hw_init(trans, rxq);
+               spin_lock(&rxq->lock);
+               /*
+                * Set read write pointer to reflect that we have processed
+                * and used all buffers, but have not restocked the Rx queue
+                * with fresh buffers
+                */
+               rxq->read = 0;
+               rxq->write = 0;
+               rxq->write_actual = 0;
+               memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts));
 
-       spin_lock(&rxq->lock);
-       iwl_pcie_rxq_inc_wr_ptr(trans);
-       spin_unlock(&rxq->lock);
+               iwl_pcie_rx_init_rxb_lists(rxq);
+
+               if (!rxq->napi.poll)
+                       netif_napi_add(&trans_pcie->napi_dev, &rxq->napi,
+                                      iwl_pcie_dummy_napi_poll, 64);
+
+               spin_unlock(&rxq->lock);
+       }
+
+       /* move the pool to the default queue and allocator ownerships */
+       num_rbds = trans->cfg->mq_rx_supported ?
+                    MQ_RX_POOL_SIZE : RX_QUEUE_SIZE;
+       allocator_pool_size = trans->num_rx_queues *
+               (RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC);
+       for (i = 0; i < num_rbds; i++) {
+               struct iwl_rx_mem_buffer *rxb = &trans_pcie->rx_pool[i];
+
+               if (i < allocator_pool_size)
+                       list_add(&rxb->list, &rba->rbd_empty);
+               else
+                       list_add(&rxb->list, &def_rxq->rx_used);
+               trans_pcie->global_table[i] = rxb;
+               rxb->vid = (u16)i;
+       }
+
+       iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL, def_rxq);
+       if (trans->cfg->mq_rx_supported) {
+               iwl_pcie_rx_mq_hw_init(trans);
+       } else {
+               iwl_pcie_rxq_restock(trans, def_rxq);
+               iwl_pcie_rx_hw_init(trans, def_rxq);
+       }
+
+       spin_lock(&def_rxq->lock);
+       iwl_pcie_rxq_inc_wr_ptr(trans, def_rxq);
+       spin_unlock(&def_rxq->lock);
 
        return 0;
 }
@@ -761,12 +905,16 @@ int iwl_pcie_rx_init(struct iwl_trans *trans)
 void iwl_pcie_rx_free(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
        struct iwl_rb_allocator *rba = &trans_pcie->rba;
+       int free_size = trans->cfg->mq_rx_supported ? sizeof(__le64) :
+                                             sizeof(__le32);
+       int i;
 
-       /*if rxq->bd is NULL, it means that nothing has been allocated,
-        * exit now */
-       if (!rxq->bd) {
+       /*
+        * if rxq is NULL, it means that nothing has been allocated,
+        * exit now
+        */
+       if (!trans_pcie->rxq) {
                IWL_DEBUG_INFO(trans, "Free NULL rx context\n");
                return;
        }
@@ -777,27 +925,37 @@ void iwl_pcie_rx_free(struct iwl_trans *trans)
                rba->alloc_wq = NULL;
        }
 
-       spin_lock(&rba->lock);
-       iwl_pcie_rx_free_rba(trans);
-       spin_unlock(&rba->lock);
-
-       spin_lock(&rxq->lock);
-       iwl_pcie_rxq_free_rbs(trans);
-       spin_unlock(&rxq->lock);
-
-       dma_free_coherent(trans->dev, sizeof(__le32) * RX_QUEUE_SIZE,
-                         rxq->bd, rxq->bd_dma);
-       rxq->bd_dma = 0;
-       rxq->bd = NULL;
-
-       if (rxq->rb_stts)
-               dma_free_coherent(trans->dev,
-                                 sizeof(struct iwl_rb_status),
-                                 rxq->rb_stts, rxq->rb_stts_dma);
-       else
-               IWL_DEBUG_INFO(trans, "Free rxq->rb_stts which is NULL\n");
-       rxq->rb_stts_dma = 0;
-       rxq->rb_stts = NULL;
+       iwl_pcie_free_rbs_pool(trans);
+
+       for (i = 0; i < trans->num_rx_queues; i++) {
+               struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+
+               if (rxq->bd)
+                       dma_free_coherent(trans->dev,
+                                         free_size * rxq->queue_size,
+                                         rxq->bd, rxq->bd_dma);
+               rxq->bd_dma = 0;
+               rxq->bd = NULL;
+
+               if (rxq->rb_stts)
+                       dma_free_coherent(trans->dev,
+                                         sizeof(struct iwl_rb_status),
+                                         rxq->rb_stts, rxq->rb_stts_dma);
+               else
+                       IWL_DEBUG_INFO(trans,
+                                      "Free rxq->rb_stts which is NULL\n");
+
+               if (rxq->used_bd)
+                       dma_free_coherent(trans->dev,
+                                         sizeof(__le32) * rxq->queue_size,
+                                         rxq->used_bd, rxq->used_bd_dma);
+               rxq->used_bd_dma = 0;
+               rxq->used_bd = NULL;
+
+               if (rxq->napi.poll)
+                       netif_napi_del(&rxq->napi);
+       }
+       kfree(trans_pcie->rxq);
 }
 
 /*
@@ -841,11 +999,11 @@ static void iwl_pcie_rx_reuse_rbd(struct iwl_trans *trans,
 }
 
 static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
+                               struct iwl_rxq *rxq,
                                struct iwl_rx_mem_buffer *rxb,
                                bool emergency)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
        struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
        bool page_stolen = false;
        int max_len = PAGE_SIZE << trans_pcie->rx_page_order;
@@ -911,7 +1069,12 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
                index = SEQ_TO_INDEX(sequence);
                cmd_index = get_cmd_index(&txq->q, index);
 
-               iwl_op_mode_rx(trans->op_mode, &trans_pcie->napi, &rxcb);
+               if (rxq->id == 0)
+                       iwl_op_mode_rx(trans->op_mode, &rxq->napi,
+                                      &rxcb);
+               else
+                       iwl_op_mode_rx_rss(trans->op_mode, &rxq->napi,
+                                          &rxcb, rxq->id);
 
                if (reclaim) {
                        kzfree(txq->entries[cmd_index].free_buf);
@@ -975,7 +1138,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
 static void iwl_pcie_rx_handle(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_rxq *rxq = &trans_pcie->rxq;
+       struct iwl_rxq *rxq = &trans_pcie->rxq[0];
        u32 r, i, j, count = 0;
        bool emergency = false;
 
@@ -993,16 +1156,26 @@ restart:
        while (i != r) {
                struct iwl_rx_mem_buffer *rxb;
 
-               if (unlikely(rxq->used_count == RX_QUEUE_SIZE / 2))
+               if (unlikely(rxq->used_count == rxq->queue_size / 2))
                        emergency = true;
 
-               rxb = rxq->queue[i];
-               rxq->queue[i] = NULL;
+               if (trans->cfg->mq_rx_supported) {
+                       /*
+                        * used_bd is a 32 bit but only 12 are used to retrieve
+                        * the vid
+                        */
+                       u16 vid = (u16)le32_to_cpu(rxq->used_bd[i]);
+
+                       rxb = trans_pcie->global_table[vid];
+               } else {
+                       rxb = rxq->queue[i];
+                       rxq->queue[i] = NULL;
+               }
 
                IWL_DEBUG_RX(trans, "rxbuf: HW = %d, SW = %d\n", r, i);
-               iwl_pcie_rx_handle_rb(trans, rxb, emergency);
+               iwl_pcie_rx_handle_rb(trans, rxq, rxb, emergency);
 
-               i = (i + 1) & RX_QUEUE_MASK;
+               i = (i + 1) & (rxq->queue_size - 1);
 
                /* If we have RX_CLAIM_REQ_ALLOC released rx buffers -
                 * try to claim the pre-allocated buffers from the allocator */
@@ -1040,10 +1213,10 @@ restart:
                        count++;
                        if (count == 8) {
                                count = 0;
-                               if (rxq->used_count < RX_QUEUE_SIZE / 3)
+                               if (rxq->used_count < rxq->queue_size / 3)
                                        emergency = false;
                                spin_unlock(&rxq->lock);
-                               iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
+                               iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC, rxq);
                                spin_lock(&rxq->lock);
                        }
                }
@@ -1055,7 +1228,10 @@ restart:
                if (rxq->free_count >=  RX_CLAIM_REQ_ALLOC) {
                        rxq->read = i;
                        spin_unlock(&rxq->lock);
-                       iwl_pcie_rxq_restock(trans);
+                       if (trans->cfg->mq_rx_supported)
+                               iwl_pcie_rxq_mq_restock(trans, rxq);
+                       else
+                               iwl_pcie_rxq_restock(trans, rxq);
                        goto restart;
                }
        }
@@ -1077,10 +1253,10 @@ restart:
         * will be restocked by the next call of iwl_pcie_rxq_restock.
         */
        if (unlikely(emergency && count))
-               iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
+               iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC, rxq);
 
-       if (trans_pcie->napi.poll)
-               napi_gro_flush(&trans_pcie->napi, false);
+       if (rxq->napi.poll)
+               napi_gro_flush(&rxq->napi, false);
 }
 
 /*