cxgb3: Replace LRO with GRO
[linux-2.6-block.git] / drivers / net / cxgb3 / sge.c
CommitLineData
4d22de3e 1/*
a02d44a0 2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
4d22de3e 3 *
1d68e93d
DLR
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
4d22de3e 9 *
1d68e93d
DLR
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
4d22de3e 31 */
4d22de3e
DLR
32#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/etherdevice.h>
35#include <linux/if_vlan.h>
36#include <linux/ip.h>
37#include <linux/tcp.h>
38#include <linux/dma-mapping.h>
a109a5b9 39#include <net/arp.h>
4d22de3e
DLR
40#include "common.h"
41#include "regs.h"
42#include "sge_defs.h"
43#include "t3_cpl.h"
44#include "firmware_exports.h"
45
46#define USE_GTS 0
47
48#define SGE_RX_SM_BUF_SIZE 1536
e0994eb1 49
4d22de3e 50#define SGE_RX_COPY_THRES 256
cf992af5 51#define SGE_RX_PULL_LEN 128
4d22de3e 52
e0994eb1 53/*
cf992af5
DLR
54 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
55 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
56 * directly.
e0994eb1 57 */
cf992af5 58#define FL0_PG_CHUNK_SIZE 2048
7385ecf3
DLR
59#define FL0_PG_ORDER 0
60#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
61#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
cf992af5 62
e0994eb1 63#define SGE_RX_DROP_THRES 16
4d22de3e
DLR
64
65/*
66 * Period of the Tx buffer reclaim timer. This timer does not need to run
67 * frequently as Tx buffers are usually reclaimed by new Tx packets.
68 */
69#define TX_RECLAIM_PERIOD (HZ / 4)
70
71/* WR size in bytes */
72#define WR_LEN (WR_FLITS * 8)
73
74/*
75 * Types of Tx queues in each queue set. Order here matters, do not change.
76 */
77enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
78
79/* Values for sge_txq.flags */
80enum {
81 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
82 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
83};
84
85struct tx_desc {
fb8e4444 86 __be64 flit[TX_DESC_FLITS];
4d22de3e
DLR
87};
88
89struct rx_desc {
90 __be32 addr_lo;
91 __be32 len_gen;
92 __be32 gen2;
93 __be32 addr_hi;
94};
95
96struct tx_sw_desc { /* SW state per Tx descriptor */
97 struct sk_buff *skb;
23561c94
DLR
98 u8 eop; /* set if last descriptor for packet */
99 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
100 u8 fragidx; /* first page fragment associated with descriptor */
101 s8 sflit; /* start flit of first SGL entry in descriptor */
4d22de3e
DLR
102};
103
cf992af5 104struct rx_sw_desc { /* SW state per Rx descriptor */
e0994eb1
DLR
105 union {
106 struct sk_buff *skb;
cf992af5
DLR
107 struct fl_pg_chunk pg_chunk;
108 };
109 DECLARE_PCI_UNMAP_ADDR(dma_addr);
4d22de3e
DLR
110};
111
112struct rsp_desc { /* response queue descriptor */
113 struct rss_header rss_hdr;
114 __be32 flags;
115 __be32 len_cq;
116 u8 imm_data[47];
117 u8 intr_gen;
118};
119
99d7cf30
DLR
120/*
121 * Holds unmapping information for Tx packets that need deferred unmapping.
122 * This structure lives at skb->head and must be allocated by callers.
123 */
124struct deferred_unmap_info {
125 struct pci_dev *pdev;
126 dma_addr_t addr[MAX_SKB_FRAGS + 1];
127};
128
4d22de3e
DLR
129/*
130 * Maps a number of flits to the number of Tx descriptors that can hold them.
131 * The formula is
132 *
133 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
134 *
135 * HW allows up to 4 descriptors to be combined into a WR.
136 */
137static u8 flit_desc_map[] = {
138 0,
139#if SGE_NUM_GENBITS == 1
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
144#elif SGE_NUM_GENBITS == 2
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
147 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
148 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
149#else
150# error "SGE_NUM_GENBITS must be 1 or 2"
151#endif
152};
153
154static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
155{
156 return container_of(q, struct sge_qset, fl[qidx]);
157}
158
159static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
160{
161 return container_of(q, struct sge_qset, rspq);
162}
163
164static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
165{
166 return container_of(q, struct sge_qset, txq[qidx]);
167}
168
169/**
170 * refill_rspq - replenish an SGE response queue
171 * @adapter: the adapter
172 * @q: the response queue to replenish
173 * @credits: how many new responses to make available
174 *
175 * Replenishes a response queue by making the supplied number of responses
176 * available to HW.
177 */
178static inline void refill_rspq(struct adapter *adapter,
179 const struct sge_rspq *q, unsigned int credits)
180{
afefce66 181 rmb();
4d22de3e
DLR
182 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
183 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
184}
185
186/**
187 * need_skb_unmap - does the platform need unmapping of sk_buffs?
188 *
189 * Returns true if the platfrom needs sk_buff unmapping. The compiler
190 * optimizes away unecessary code if this returns true.
191 */
192static inline int need_skb_unmap(void)
193{
194 /*
195 * This structure is used to tell if the platfrom needs buffer
196 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
197 */
198 struct dummy {
199 DECLARE_PCI_UNMAP_ADDR(addr);
200 };
201
202 return sizeof(struct dummy) != 0;
203}
204
205/**
206 * unmap_skb - unmap a packet main body and its page fragments
207 * @skb: the packet
208 * @q: the Tx queue containing Tx descriptors for the packet
209 * @cidx: index of Tx descriptor
210 * @pdev: the PCI device
211 *
212 * Unmap the main body of an sk_buff and its page fragments, if any.
213 * Because of the fairly complicated structure of our SGLs and the desire
23561c94
DLR
214 * to conserve space for metadata, the information necessary to unmap an
215 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
216 * descriptors (the physical addresses of the various data buffers), and
217 * the SW descriptor state (assorted indices). The send functions
218 * initialize the indices for the first packet descriptor so we can unmap
219 * the buffers held in the first Tx descriptor here, and we have enough
220 * information at this point to set the state for the next Tx descriptor.
221 *
222 * Note that it is possible to clean up the first descriptor of a packet
223 * before the send routines have written the next descriptors, but this
224 * race does not cause any problem. We just end up writing the unmapping
225 * info for the descriptor first.
4d22de3e
DLR
226 */
227static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
228 unsigned int cidx, struct pci_dev *pdev)
229{
230 const struct sg_ent *sgp;
23561c94
DLR
231 struct tx_sw_desc *d = &q->sdesc[cidx];
232 int nfrags, frag_idx, curflit, j = d->addr_idx;
4d22de3e 233
23561c94
DLR
234 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
235 frag_idx = d->fragidx;
4d22de3e 236
23561c94
DLR
237 if (frag_idx == 0 && skb_headlen(skb)) {
238 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
239 skb_headlen(skb), PCI_DMA_TODEVICE);
4d22de3e
DLR
240 j = 1;
241 }
242
23561c94 243 curflit = d->sflit + 1 + j;
4d22de3e
DLR
244 nfrags = skb_shinfo(skb)->nr_frags;
245
246 while (frag_idx < nfrags && curflit < WR_FLITS) {
247 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
248 skb_shinfo(skb)->frags[frag_idx].size,
249 PCI_DMA_TODEVICE);
250 j ^= 1;
251 if (j == 0) {
252 sgp++;
253 curflit++;
254 }
255 curflit++;
256 frag_idx++;
257 }
258
23561c94
DLR
259 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
260 d = cidx + 1 == q->size ? q->sdesc : d + 1;
261 d->fragidx = frag_idx;
262 d->addr_idx = j;
263 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
4d22de3e
DLR
264 }
265}
266
267/**
268 * free_tx_desc - reclaims Tx descriptors and their buffers
269 * @adapter: the adapter
270 * @q: the Tx queue to reclaim descriptors from
271 * @n: the number of descriptors to reclaim
272 *
273 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
274 * Tx buffers. Called with the Tx queue lock held.
275 */
276static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
277 unsigned int n)
278{
279 struct tx_sw_desc *d;
280 struct pci_dev *pdev = adapter->pdev;
281 unsigned int cidx = q->cidx;
282
99d7cf30
DLR
283 const int need_unmap = need_skb_unmap() &&
284 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
285
4d22de3e
DLR
286 d = &q->sdesc[cidx];
287 while (n--) {
288 if (d->skb) { /* an SGL is present */
99d7cf30 289 if (need_unmap)
4d22de3e 290 unmap_skb(d->skb, q, cidx, pdev);
23561c94 291 if (d->eop)
4d22de3e
DLR
292 kfree_skb(d->skb);
293 }
294 ++d;
295 if (++cidx == q->size) {
296 cidx = 0;
297 d = q->sdesc;
298 }
299 }
300 q->cidx = cidx;
301}
302
303/**
304 * reclaim_completed_tx - reclaims completed Tx descriptors
305 * @adapter: the adapter
306 * @q: the Tx queue to reclaim completed descriptors from
307 *
308 * Reclaims Tx descriptors that the SGE has indicated it has processed,
309 * and frees the associated buffers if possible. Called with the Tx
310 * queue's lock held.
311 */
312static inline void reclaim_completed_tx(struct adapter *adapter,
313 struct sge_txq *q)
314{
315 unsigned int reclaim = q->processed - q->cleaned;
316
317 if (reclaim) {
318 free_tx_desc(adapter, q, reclaim);
319 q->cleaned += reclaim;
320 q->in_use -= reclaim;
321 }
322}
323
324/**
325 * should_restart_tx - are there enough resources to restart a Tx queue?
326 * @q: the Tx queue
327 *
328 * Checks if there are enough descriptors to restart a suspended Tx queue.
329 */
330static inline int should_restart_tx(const struct sge_txq *q)
331{
332 unsigned int r = q->processed - q->cleaned;
333
334 return q->in_use - r < (q->size >> 1);
335}
336
337/**
338 * free_rx_bufs - free the Rx buffers on an SGE free list
339 * @pdev: the PCI device associated with the adapter
340 * @rxq: the SGE free list to clean up
341 *
342 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
343 * this queue should be stopped before calling this function.
344 */
345static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
346{
347 unsigned int cidx = q->cidx;
348
349 while (q->credits--) {
350 struct rx_sw_desc *d = &q->sdesc[cidx];
351
352 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
353 q->buf_size, PCI_DMA_FROMDEVICE);
cf992af5 354 if (q->use_pages) {
20d3fc11
DLR
355 if (d->pg_chunk.page)
356 put_page(d->pg_chunk.page);
cf992af5 357 d->pg_chunk.page = NULL;
e0994eb1 358 } else {
cf992af5
DLR
359 kfree_skb(d->skb);
360 d->skb = NULL;
e0994eb1 361 }
4d22de3e
DLR
362 if (++cidx == q->size)
363 cidx = 0;
364 }
e0994eb1 365
cf992af5 366 if (q->pg_chunk.page) {
7385ecf3 367 __free_pages(q->pg_chunk.page, q->order);
cf992af5
DLR
368 q->pg_chunk.page = NULL;
369 }
4d22de3e
DLR
370}
371
372/**
373 * add_one_rx_buf - add a packet buffer to a free-buffer list
cf992af5 374 * @va: buffer start VA
4d22de3e
DLR
375 * @len: the buffer length
376 * @d: the HW Rx descriptor to write
377 * @sd: the SW Rx descriptor to write
378 * @gen: the generation bit value
379 * @pdev: the PCI device associated with the adapter
380 *
381 * Add a buffer of the given length to the supplied HW and SW Rx
382 * descriptors.
383 */
b1fb1f28
DLR
384static inline int add_one_rx_buf(void *va, unsigned int len,
385 struct rx_desc *d, struct rx_sw_desc *sd,
386 unsigned int gen, struct pci_dev *pdev)
4d22de3e
DLR
387{
388 dma_addr_t mapping;
389
e0994eb1 390 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
8d8bb39b 391 if (unlikely(pci_dma_mapping_error(pdev, mapping)))
b1fb1f28
DLR
392 return -ENOMEM;
393
4d22de3e
DLR
394 pci_unmap_addr_set(sd, dma_addr, mapping);
395
396 d->addr_lo = cpu_to_be32(mapping);
397 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
398 wmb();
399 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
400 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
b1fb1f28 401 return 0;
4d22de3e
DLR
402}
403
7385ecf3
DLR
404static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
405 unsigned int order)
cf992af5
DLR
406{
407 if (!q->pg_chunk.page) {
7385ecf3 408 q->pg_chunk.page = alloc_pages(gfp, order);
cf992af5
DLR
409 if (unlikely(!q->pg_chunk.page))
410 return -ENOMEM;
411 q->pg_chunk.va = page_address(q->pg_chunk.page);
412 q->pg_chunk.offset = 0;
413 }
414 sd->pg_chunk = q->pg_chunk;
415
416 q->pg_chunk.offset += q->buf_size;
7385ecf3 417 if (q->pg_chunk.offset == (PAGE_SIZE << order))
cf992af5
DLR
418 q->pg_chunk.page = NULL;
419 else {
420 q->pg_chunk.va += q->buf_size;
421 get_page(q->pg_chunk.page);
422 }
423 return 0;
424}
425
4d22de3e
DLR
426/**
427 * refill_fl - refill an SGE free-buffer list
428 * @adapter: the adapter
429 * @q: the free-list to refill
430 * @n: the number of new buffers to allocate
431 * @gfp: the gfp flags for allocating new buffers
432 *
433 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
434 * allocated with the supplied gfp flags. The caller must assure that
435 * @n does not exceed the queue's capacity.
436 */
b1fb1f28 437static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
4d22de3e 438{
cf992af5 439 void *buf_start;
4d22de3e
DLR
440 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
441 struct rx_desc *d = &q->desc[q->pidx];
b1fb1f28 442 unsigned int count = 0;
4d22de3e
DLR
443
444 while (n--) {
b1fb1f28
DLR
445 int err;
446
cf992af5 447 if (q->use_pages) {
7385ecf3 448 if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
cf992af5 449nomem: q->alloc_failed++;
e0994eb1
DLR
450 break;
451 }
cf992af5 452 buf_start = sd->pg_chunk.va;
e0994eb1 453 } else {
cf992af5 454 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
e0994eb1 455
cf992af5
DLR
456 if (!skb)
457 goto nomem;
e0994eb1 458
cf992af5
DLR
459 sd->skb = skb;
460 buf_start = skb->data;
e0994eb1
DLR
461 }
462
b1fb1f28
DLR
463 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
464 adap->pdev);
465 if (unlikely(err)) {
466 if (!q->use_pages) {
467 kfree_skb(sd->skb);
468 sd->skb = NULL;
469 }
470 break;
471 }
472
4d22de3e
DLR
473 d++;
474 sd++;
475 if (++q->pidx == q->size) {
476 q->pidx = 0;
477 q->gen ^= 1;
478 sd = q->sdesc;
479 d = q->desc;
480 }
481 q->credits++;
b1fb1f28 482 count++;
4d22de3e 483 }
afefce66 484 wmb();
b1fb1f28
DLR
485 if (likely(count))
486 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
487
488 return count;
4d22de3e
DLR
489}
490
491static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
492{
7385ecf3
DLR
493 refill_fl(adap, fl, min(16U, fl->size - fl->credits),
494 GFP_ATOMIC | __GFP_COMP);
4d22de3e
DLR
495}
496
497/**
498 * recycle_rx_buf - recycle a receive buffer
499 * @adapter: the adapter
500 * @q: the SGE free list
501 * @idx: index of buffer to recycle
502 *
503 * Recycles the specified buffer on the given free list by adding it at
504 * the next available slot on the list.
505 */
506static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
507 unsigned int idx)
508{
509 struct rx_desc *from = &q->desc[idx];
510 struct rx_desc *to = &q->desc[q->pidx];
511
cf992af5 512 q->sdesc[q->pidx] = q->sdesc[idx];
4d22de3e
DLR
513 to->addr_lo = from->addr_lo; /* already big endian */
514 to->addr_hi = from->addr_hi; /* likewise */
515 wmb();
516 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
517 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
518 q->credits++;
519
520 if (++q->pidx == q->size) {
521 q->pidx = 0;
522 q->gen ^= 1;
523 }
524 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
525}
526
527/**
528 * alloc_ring - allocate resources for an SGE descriptor ring
529 * @pdev: the PCI device
530 * @nelem: the number of descriptors
531 * @elem_size: the size of each descriptor
532 * @sw_size: the size of the SW state associated with each ring element
533 * @phys: the physical address of the allocated ring
534 * @metadata: address of the array holding the SW state for the ring
535 *
536 * Allocates resources for an SGE descriptor ring, such as Tx queues,
537 * free buffer lists, or response queues. Each SGE ring requires
538 * space for its HW descriptors plus, optionally, space for the SW state
539 * associated with each HW entry (the metadata). The function returns
540 * three values: the virtual address for the HW ring (the return value
541 * of the function), the physical address of the HW ring, and the address
542 * of the SW ring.
543 */
544static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
e0994eb1 545 size_t sw_size, dma_addr_t * phys, void *metadata)
4d22de3e
DLR
546{
547 size_t len = nelem * elem_size;
548 void *s = NULL;
549 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
550
551 if (!p)
552 return NULL;
52565544 553 if (sw_size && metadata) {
4d22de3e
DLR
554 s = kcalloc(nelem, sw_size, GFP_KERNEL);
555
556 if (!s) {
557 dma_free_coherent(&pdev->dev, len, p, *phys);
558 return NULL;
559 }
4d22de3e 560 *(void **)metadata = s;
52565544 561 }
4d22de3e
DLR
562 memset(p, 0, len);
563 return p;
564}
565
204e2f98
DLR
566/**
567 * t3_reset_qset - reset a sge qset
568 * @q: the queue set
569 *
570 * Reset the qset structure.
571 * the NAPI structure is preserved in the event of
572 * the qset's reincarnation, for example during EEH recovery.
573 */
574static void t3_reset_qset(struct sge_qset *q)
575{
576 if (q->adap &&
577 !(q->adap->flags & NAPI_INIT)) {
578 memset(q, 0, sizeof(*q));
579 return;
580 }
581
582 q->adap = NULL;
583 memset(&q->rspq, 0, sizeof(q->rspq));
584 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
585 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
586 q->txq_stopped = 0;
20d3fc11 587 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
7be2df45 588 q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
204e2f98
DLR
589}
590
591
4d22de3e
DLR
592/**
593 * free_qset - free the resources of an SGE queue set
594 * @adapter: the adapter owning the queue set
595 * @q: the queue set
596 *
597 * Release the HW and SW resources associated with an SGE queue set, such
598 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
599 * queue set must be quiesced prior to calling this.
600 */
9265fabf 601static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
4d22de3e
DLR
602{
603 int i;
604 struct pci_dev *pdev = adapter->pdev;
605
4d22de3e
DLR
606 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
607 if (q->fl[i].desc) {
b1186dee 608 spin_lock_irq(&adapter->sge.reg_lock);
4d22de3e 609 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
b1186dee 610 spin_unlock_irq(&adapter->sge.reg_lock);
4d22de3e
DLR
611 free_rx_bufs(pdev, &q->fl[i]);
612 kfree(q->fl[i].sdesc);
613 dma_free_coherent(&pdev->dev,
614 q->fl[i].size *
615 sizeof(struct rx_desc), q->fl[i].desc,
616 q->fl[i].phys_addr);
617 }
618
619 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
620 if (q->txq[i].desc) {
b1186dee 621 spin_lock_irq(&adapter->sge.reg_lock);
4d22de3e 622 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
b1186dee 623 spin_unlock_irq(&adapter->sge.reg_lock);
4d22de3e
DLR
624 if (q->txq[i].sdesc) {
625 free_tx_desc(adapter, &q->txq[i],
626 q->txq[i].in_use);
627 kfree(q->txq[i].sdesc);
628 }
629 dma_free_coherent(&pdev->dev,
630 q->txq[i].size *
631 sizeof(struct tx_desc),
632 q->txq[i].desc, q->txq[i].phys_addr);
633 __skb_queue_purge(&q->txq[i].sendq);
634 }
635
636 if (q->rspq.desc) {
b1186dee 637 spin_lock_irq(&adapter->sge.reg_lock);
4d22de3e 638 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
b1186dee 639 spin_unlock_irq(&adapter->sge.reg_lock);
4d22de3e
DLR
640 dma_free_coherent(&pdev->dev,
641 q->rspq.size * sizeof(struct rsp_desc),
642 q->rspq.desc, q->rspq.phys_addr);
643 }
644
204e2f98 645 t3_reset_qset(q);
4d22de3e
DLR
646}
647
648/**
649 * init_qset_cntxt - initialize an SGE queue set context info
650 * @qs: the queue set
651 * @id: the queue set id
652 *
653 * Initializes the TIDs and context ids for the queues of a queue set.
654 */
655static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
656{
657 qs->rspq.cntxt_id = id;
658 qs->fl[0].cntxt_id = 2 * id;
659 qs->fl[1].cntxt_id = 2 * id + 1;
660 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
661 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
662 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
663 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
664 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
665}
666
667/**
668 * sgl_len - calculates the size of an SGL of the given capacity
669 * @n: the number of SGL entries
670 *
671 * Calculates the number of flits needed for a scatter/gather list that
672 * can hold the given number of entries.
673 */
674static inline unsigned int sgl_len(unsigned int n)
675{
676 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
677 return (3 * n) / 2 + (n & 1);
678}
679
680/**
681 * flits_to_desc - returns the num of Tx descriptors for the given flits
682 * @n: the number of flits
683 *
684 * Calculates the number of Tx descriptors needed for the supplied number
685 * of flits.
686 */
687static inline unsigned int flits_to_desc(unsigned int n)
688{
689 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
690 return flit_desc_map[n];
691}
692
cf992af5
DLR
693/**
694 * get_packet - return the next ingress packet buffer from a free list
695 * @adap: the adapter that received the packet
696 * @fl: the SGE free list holding the packet
697 * @len: the packet length including any SGE padding
698 * @drop_thres: # of remaining buffers before we start dropping packets
699 *
700 * Get the next packet from a free list and complete setup of the
701 * sk_buff. If the packet is small we make a copy and recycle the
702 * original buffer, otherwise we use the original buffer itself. If a
703 * positive drop threshold is supplied packets are dropped and their
704 * buffers recycled if (a) the number of remaining buffers is under the
705 * threshold and the packet is too big to copy, or (b) the packet should
706 * be copied but there is no memory for the copy.
707 */
708static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
709 unsigned int len, unsigned int drop_thres)
710{
711 struct sk_buff *skb = NULL;
712 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
713
714 prefetch(sd->skb->data);
715 fl->credits--;
716
717 if (len <= SGE_RX_COPY_THRES) {
718 skb = alloc_skb(len, GFP_ATOMIC);
719 if (likely(skb != NULL)) {
720 __skb_put(skb, len);
721 pci_dma_sync_single_for_cpu(adap->pdev,
722 pci_unmap_addr(sd, dma_addr), len,
723 PCI_DMA_FROMDEVICE);
724 memcpy(skb->data, sd->skb->data, len);
725 pci_dma_sync_single_for_device(adap->pdev,
726 pci_unmap_addr(sd, dma_addr), len,
727 PCI_DMA_FROMDEVICE);
728 } else if (!drop_thres)
729 goto use_orig_buf;
730recycle:
731 recycle_rx_buf(adap, fl, fl->cidx);
732 return skb;
733 }
734
735 if (unlikely(fl->credits < drop_thres))
736 goto recycle;
737
738use_orig_buf:
739 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
740 fl->buf_size, PCI_DMA_FROMDEVICE);
741 skb = sd->skb;
742 skb_put(skb, len);
743 __refill_fl(adap, fl);
744 return skb;
745}
746
747/**
748 * get_packet_pg - return the next ingress packet buffer from a free list
749 * @adap: the adapter that received the packet
750 * @fl: the SGE free list holding the packet
751 * @len: the packet length including any SGE padding
752 * @drop_thres: # of remaining buffers before we start dropping packets
753 *
754 * Get the next packet from a free list populated with page chunks.
755 * If the packet is small we make a copy and recycle the original buffer,
756 * otherwise we attach the original buffer as a page fragment to a fresh
757 * sk_buff. If a positive drop threshold is supplied packets are dropped
758 * and their buffers recycled if (a) the number of remaining buffers is
759 * under the threshold and the packet is too big to copy, or (b) there's
760 * no system memory.
761 *
762 * Note: this function is similar to @get_packet but deals with Rx buffers
763 * that are page chunks rather than sk_buffs.
764 */
765static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
7385ecf3
DLR
766 struct sge_rspq *q, unsigned int len,
767 unsigned int drop_thres)
cf992af5 768{
7385ecf3 769 struct sk_buff *newskb, *skb;
cf992af5
DLR
770 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
771
7385ecf3
DLR
772 newskb = skb = q->pg_skb;
773
774 if (!skb && (len <= SGE_RX_COPY_THRES)) {
775 newskb = alloc_skb(len, GFP_ATOMIC);
776 if (likely(newskb != NULL)) {
777 __skb_put(newskb, len);
cf992af5
DLR
778 pci_dma_sync_single_for_cpu(adap->pdev,
779 pci_unmap_addr(sd, dma_addr), len,
780 PCI_DMA_FROMDEVICE);
7385ecf3 781 memcpy(newskb->data, sd->pg_chunk.va, len);
cf992af5
DLR
782 pci_dma_sync_single_for_device(adap->pdev,
783 pci_unmap_addr(sd, dma_addr), len,
784 PCI_DMA_FROMDEVICE);
785 } else if (!drop_thres)
786 return NULL;
787recycle:
788 fl->credits--;
789 recycle_rx_buf(adap, fl, fl->cidx);
7385ecf3
DLR
790 q->rx_recycle_buf++;
791 return newskb;
cf992af5
DLR
792 }
793
7385ecf3 794 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
cf992af5
DLR
795 goto recycle;
796
7385ecf3 797 if (!skb)
b47385bd 798 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
7385ecf3 799 if (unlikely(!newskb)) {
cf992af5
DLR
800 if (!drop_thres)
801 return NULL;
802 goto recycle;
803 }
804
805 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
806 fl->buf_size, PCI_DMA_FROMDEVICE);
7385ecf3
DLR
807 if (!skb) {
808 __skb_put(newskb, SGE_RX_PULL_LEN);
809 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
810 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
811 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
812 len - SGE_RX_PULL_LEN);
813 newskb->len = len;
814 newskb->data_len = len - SGE_RX_PULL_LEN;
815 } else {
816 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
817 sd->pg_chunk.page,
818 sd->pg_chunk.offset, len);
819 newskb->len += len;
820 newskb->data_len += len;
821 }
822 newskb->truesize += newskb->data_len;
cf992af5
DLR
823
824 fl->credits--;
825 /*
826 * We do not refill FLs here, we let the caller do it to overlap a
827 * prefetch.
828 */
7385ecf3 829 return newskb;
cf992af5
DLR
830}
831
4d22de3e
DLR
832/**
833 * get_imm_packet - return the next ingress packet buffer from a response
834 * @resp: the response descriptor containing the packet data
835 *
836 * Return a packet containing the immediate data of the given response.
837 */
838static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
839{
840 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
841
842 if (skb) {
843 __skb_put(skb, IMMED_PKT_SIZE);
27d7ff46 844 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
4d22de3e
DLR
845 }
846 return skb;
847}
848
849/**
850 * calc_tx_descs - calculate the number of Tx descriptors for a packet
851 * @skb: the packet
852 *
853 * Returns the number of Tx descriptors needed for the given Ethernet
854 * packet. Ethernet packets require addition of WR and CPL headers.
855 */
856static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
857{
858 unsigned int flits;
859
860 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
861 return 1;
862
863 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
864 if (skb_shinfo(skb)->gso_size)
865 flits++;
866 return flits_to_desc(flits);
867}
868
869/**
870 * make_sgl - populate a scatter/gather list for a packet
871 * @skb: the packet
872 * @sgp: the SGL to populate
873 * @start: start address of skb main body data to include in the SGL
874 * @len: length of skb main body data to include in the SGL
875 * @pdev: the PCI device
876 *
877 * Generates a scatter/gather list for the buffers that make up a packet
878 * and returns the SGL size in 8-byte words. The caller must size the SGL
879 * appropriately.
880 */
881static inline unsigned int make_sgl(const struct sk_buff *skb,
882 struct sg_ent *sgp, unsigned char *start,
883 unsigned int len, struct pci_dev *pdev)
884{
885 dma_addr_t mapping;
886 unsigned int i, j = 0, nfrags;
887
888 if (len) {
889 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
890 sgp->len[0] = cpu_to_be32(len);
891 sgp->addr[0] = cpu_to_be64(mapping);
892 j = 1;
893 }
894
895 nfrags = skb_shinfo(skb)->nr_frags;
896 for (i = 0; i < nfrags; i++) {
897 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
898
899 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
900 frag->size, PCI_DMA_TODEVICE);
901 sgp->len[j] = cpu_to_be32(frag->size);
902 sgp->addr[j] = cpu_to_be64(mapping);
903 j ^= 1;
904 if (j == 0)
905 ++sgp;
906 }
907 if (j)
908 sgp->len[j] = 0;
909 return ((nfrags + (len != 0)) * 3) / 2 + j;
910}
911
912/**
913 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
914 * @adap: the adapter
915 * @q: the Tx queue
916 *
917 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
918 * where the HW is going to sleep just after we checked, however,
919 * then the interrupt handler will detect the outstanding TX packet
920 * and ring the doorbell for us.
921 *
922 * When GTS is disabled we unconditionally ring the doorbell.
923 */
924static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
925{
926#if USE_GTS
927 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
928 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
929 set_bit(TXQ_LAST_PKT_DB, &q->flags);
930 t3_write_reg(adap, A_SG_KDOORBELL,
931 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
932 }
933#else
934 wmb(); /* write descriptors before telling HW */
935 t3_write_reg(adap, A_SG_KDOORBELL,
936 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
937#endif
938}
939
940static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
941{
942#if SGE_NUM_GENBITS == 2
943 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
944#endif
945}
946
947/**
948 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
949 * @ndesc: number of Tx descriptors spanned by the SGL
950 * @skb: the packet corresponding to the WR
951 * @d: first Tx descriptor to be written
952 * @pidx: index of above descriptors
953 * @q: the SGE Tx queue
954 * @sgl: the SGL
955 * @flits: number of flits to the start of the SGL in the first descriptor
956 * @sgl_flits: the SGL size in flits
957 * @gen: the Tx descriptor generation
958 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
959 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
960 *
961 * Write a work request header and an associated SGL. If the SGL is
962 * small enough to fit into one Tx descriptor it has already been written
963 * and we just need to write the WR header. Otherwise we distribute the
964 * SGL across the number of descriptors it spans.
965 */
966static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
967 struct tx_desc *d, unsigned int pidx,
968 const struct sge_txq *q,
969 const struct sg_ent *sgl,
970 unsigned int flits, unsigned int sgl_flits,
fb8e4444
AV
971 unsigned int gen, __be32 wr_hi,
972 __be32 wr_lo)
4d22de3e
DLR
973{
974 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
975 struct tx_sw_desc *sd = &q->sdesc[pidx];
976
977 sd->skb = skb;
978 if (need_skb_unmap()) {
23561c94
DLR
979 sd->fragidx = 0;
980 sd->addr_idx = 0;
981 sd->sflit = flits;
4d22de3e
DLR
982 }
983
984 if (likely(ndesc == 1)) {
23561c94 985 sd->eop = 1;
4d22de3e
DLR
986 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
987 V_WR_SGLSFLT(flits)) | wr_hi;
988 wmb();
989 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
990 V_WR_GEN(gen)) | wr_lo;
991 wr_gen2(d, gen);
992 } else {
993 unsigned int ogen = gen;
994 const u64 *fp = (const u64 *)sgl;
995 struct work_request_hdr *wp = wrp;
996
997 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
998 V_WR_SGLSFLT(flits)) | wr_hi;
999
1000 while (sgl_flits) {
1001 unsigned int avail = WR_FLITS - flits;
1002
1003 if (avail > sgl_flits)
1004 avail = sgl_flits;
1005 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1006 sgl_flits -= avail;
1007 ndesc--;
1008 if (!sgl_flits)
1009 break;
1010
1011 fp += avail;
1012 d++;
23561c94 1013 sd->eop = 0;
4d22de3e
DLR
1014 sd++;
1015 if (++pidx == q->size) {
1016 pidx = 0;
1017 gen ^= 1;
1018 d = q->desc;
1019 sd = q->sdesc;
1020 }
1021
1022 sd->skb = skb;
1023 wrp = (struct work_request_hdr *)d;
1024 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1025 V_WR_SGLSFLT(1)) | wr_hi;
1026 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1027 sgl_flits + 1)) |
1028 V_WR_GEN(gen)) | wr_lo;
1029 wr_gen2(d, gen);
1030 flits = 1;
1031 }
23561c94 1032 sd->eop = 1;
4d22de3e
DLR
1033 wrp->wr_hi |= htonl(F_WR_EOP);
1034 wmb();
1035 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1036 wr_gen2((struct tx_desc *)wp, ogen);
1037 WARN_ON(ndesc != 0);
1038 }
1039}
1040
1041/**
1042 * write_tx_pkt_wr - write a TX_PKT work request
1043 * @adap: the adapter
1044 * @skb: the packet to send
1045 * @pi: the egress interface
1046 * @pidx: index of the first Tx descriptor to write
1047 * @gen: the generation value to use
1048 * @q: the Tx queue
1049 * @ndesc: number of descriptors the packet will occupy
1050 * @compl: the value of the COMPL bit to use
1051 *
1052 * Generate a TX_PKT work request to send the supplied packet.
1053 */
1054static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1055 const struct port_info *pi,
1056 unsigned int pidx, unsigned int gen,
1057 struct sge_txq *q, unsigned int ndesc,
1058 unsigned int compl)
1059{
1060 unsigned int flits, sgl_flits, cntrl, tso_info;
1061 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1062 struct tx_desc *d = &q->desc[pidx];
1063 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1064
1065 cpl->len = htonl(skb->len | 0x80000000);
1066 cntrl = V_TXPKT_INTF(pi->port_id);
1067
1068 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1069 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1070
1071 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1072 if (tso_info) {
1073 int eth_type;
1074 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1075
1076 d->flit[2] = 0;
1077 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1078 hdr->cntrl = htonl(cntrl);
bbe735e4 1079 eth_type = skb_network_offset(skb) == ETH_HLEN ?
4d22de3e
DLR
1080 CPL_ETH_II : CPL_ETH_II_VLAN;
1081 tso_info |= V_LSO_ETH_TYPE(eth_type) |
eddc9ec5 1082 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
aa8223c7 1083 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
4d22de3e
DLR
1084 hdr->lso_info = htonl(tso_info);
1085 flits = 3;
1086 } else {
1087 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1088 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1089 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1090 cpl->cntrl = htonl(cntrl);
1091
1092 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1093 q->sdesc[pidx].skb = NULL;
1094 if (!skb->data_len)
d626f62b
ACM
1095 skb_copy_from_linear_data(skb, &d->flit[2],
1096 skb->len);
4d22de3e
DLR
1097 else
1098 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1099
1100 flits = (skb->len + 7) / 8 + 2;
1101 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1102 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1103 | F_WR_SOP | F_WR_EOP | compl);
1104 wmb();
1105 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1106 V_WR_TID(q->token));
1107 wr_gen2(d, gen);
1108 kfree_skb(skb);
1109 return;
1110 }
1111
1112 flits = 2;
1113 }
1114
1115 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1116 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
4d22de3e
DLR
1117
1118 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1119 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1120 htonl(V_WR_TID(q->token)));
1121}
1122
82ad3329
DLR
1123static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1124 struct sge_qset *qs, struct sge_txq *q)
a8cc21f6 1125{
82ad3329 1126 netif_tx_stop_queue(txq);
a8cc21f6
KK
1127 set_bit(TXQ_ETH, &qs->txq_stopped);
1128 q->stops++;
1129}
1130
4d22de3e
DLR
1131/**
1132 * eth_xmit - add a packet to the Ethernet Tx queue
1133 * @skb: the packet
1134 * @dev: the egress net device
1135 *
1136 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1137 */
1138int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1139{
82ad3329 1140 int qidx;
4d22de3e
DLR
1141 unsigned int ndesc, pidx, credits, gen, compl;
1142 const struct port_info *pi = netdev_priv(dev);
5fbf816f 1143 struct adapter *adap = pi->adapter;
82ad3329
DLR
1144 struct netdev_queue *txq;
1145 struct sge_qset *qs;
1146 struct sge_txq *q;
4d22de3e
DLR
1147
1148 /*
1149 * The chip min packet length is 9 octets but play safe and reject
1150 * anything shorter than an Ethernet header.
1151 */
1152 if (unlikely(skb->len < ETH_HLEN)) {
1153 dev_kfree_skb(skb);
1154 return NETDEV_TX_OK;
1155 }
1156
82ad3329
DLR
1157 qidx = skb_get_queue_mapping(skb);
1158 qs = &pi->qs[qidx];
1159 q = &qs->txq[TXQ_ETH];
1160 txq = netdev_get_tx_queue(dev, qidx);
1161
4d22de3e
DLR
1162 spin_lock(&q->lock);
1163 reclaim_completed_tx(adap, q);
1164
1165 credits = q->size - q->in_use;
1166 ndesc = calc_tx_descs(skb);
1167
1168 if (unlikely(credits < ndesc)) {
82ad3329 1169 t3_stop_tx_queue(txq, qs, q);
a8cc21f6
KK
1170 dev_err(&adap->pdev->dev,
1171 "%s: Tx ring %u full while queue awake!\n",
1172 dev->name, q->cntxt_id & 7);
4d22de3e
DLR
1173 spin_unlock(&q->lock);
1174 return NETDEV_TX_BUSY;
1175 }
1176
1177 q->in_use += ndesc;
cd7e9034 1178 if (unlikely(credits - ndesc < q->stop_thres)) {
82ad3329 1179 t3_stop_tx_queue(txq, qs, q);
cd7e9034
DLR
1180
1181 if (should_restart_tx(q) &&
1182 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1183 q->restarts++;
82ad3329 1184 netif_tx_wake_queue(txq);
cd7e9034
DLR
1185 }
1186 }
4d22de3e
DLR
1187
1188 gen = q->gen;
1189 q->unacked += ndesc;
1190 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1191 q->unacked &= 7;
1192 pidx = q->pidx;
1193 q->pidx += ndesc;
1194 if (q->pidx >= q->size) {
1195 q->pidx -= q->size;
1196 q->gen ^= 1;
1197 }
1198
1199 /* update port statistics */
1200 if (skb->ip_summed == CHECKSUM_COMPLETE)
1201 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1202 if (skb_shinfo(skb)->gso_size)
1203 qs->port_stats[SGE_PSTAT_TSO]++;
1204 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1205 qs->port_stats[SGE_PSTAT_VLANINS]++;
1206
1207 dev->trans_start = jiffies;
1208 spin_unlock(&q->lock);
1209
1210 /*
1211 * We do not use Tx completion interrupts to free DMAd Tx packets.
1212 * This is good for performamce but means that we rely on new Tx
1213 * packets arriving to run the destructors of completed packets,
1214 * which open up space in their sockets' send queues. Sometimes
1215 * we do not get such new packets causing Tx to stall. A single
1216 * UDP transmitter is a good example of this situation. We have
1217 * a clean up timer that periodically reclaims completed packets
1218 * but it doesn't run often enough (nor do we want it to) to prevent
1219 * lengthy stalls. A solution to this problem is to run the
1220 * destructor early, after the packet is queued but before it's DMAd.
1221 * A cons is that we lie to socket memory accounting, but the amount
1222 * of extra memory is reasonable (limited by the number of Tx
1223 * descriptors), the packets do actually get freed quickly by new
1224 * packets almost always, and for protocols like TCP that wait for
1225 * acks to really free up the data the extra memory is even less.
1226 * On the positive side we run the destructors on the sending CPU
1227 * rather than on a potentially different completing CPU, usually a
1228 * good thing. We also run them without holding our Tx queue lock,
1229 * unlike what reclaim_completed_tx() would otherwise do.
1230 *
1231 * Run the destructor before telling the DMA engine about the packet
1232 * to make sure it doesn't complete and get freed prematurely.
1233 */
1234 if (likely(!skb_shared(skb)))
1235 skb_orphan(skb);
1236
1237 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1238 check_ring_tx_db(adap, q);
1239 return NETDEV_TX_OK;
1240}
1241
1242/**
1243 * write_imm - write a packet into a Tx descriptor as immediate data
1244 * @d: the Tx descriptor to write
1245 * @skb: the packet
1246 * @len: the length of packet data to write as immediate data
1247 * @gen: the generation bit value to write
1248 *
1249 * Writes a packet as immediate data into a Tx descriptor. The packet
1250 * contains a work request at its beginning. We must write the packet
27186dc3
DLR
1251 * carefully so the SGE doesn't read it accidentally before it's written
1252 * in its entirety.
4d22de3e
DLR
1253 */
1254static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1255 unsigned int len, unsigned int gen)
1256{
1257 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1258 struct work_request_hdr *to = (struct work_request_hdr *)d;
1259
27186dc3
DLR
1260 if (likely(!skb->data_len))
1261 memcpy(&to[1], &from[1], len - sizeof(*from));
1262 else
1263 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1264
4d22de3e
DLR
1265 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1266 V_WR_BCNTLFLT(len & 7));
1267 wmb();
1268 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1269 V_WR_LEN((len + 7) / 8));
1270 wr_gen2(d, gen);
1271 kfree_skb(skb);
1272}
1273
1274/**
1275 * check_desc_avail - check descriptor availability on a send queue
1276 * @adap: the adapter
1277 * @q: the send queue
1278 * @skb: the packet needing the descriptors
1279 * @ndesc: the number of Tx descriptors needed
1280 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1281 *
1282 * Checks if the requested number of Tx descriptors is available on an
1283 * SGE send queue. If the queue is already suspended or not enough
1284 * descriptors are available the packet is queued for later transmission.
1285 * Must be called with the Tx queue locked.
1286 *
1287 * Returns 0 if enough descriptors are available, 1 if there aren't
1288 * enough descriptors and the packet has been queued, and 2 if the caller
1289 * needs to retry because there weren't enough descriptors at the
1290 * beginning of the call but some freed up in the mean time.
1291 */
1292static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1293 struct sk_buff *skb, unsigned int ndesc,
1294 unsigned int qid)
1295{
1296 if (unlikely(!skb_queue_empty(&q->sendq))) {
1297 addq_exit:__skb_queue_tail(&q->sendq, skb);
1298 return 1;
1299 }
1300 if (unlikely(q->size - q->in_use < ndesc)) {
1301 struct sge_qset *qs = txq_to_qset(q, qid);
1302
1303 set_bit(qid, &qs->txq_stopped);
1304 smp_mb__after_clear_bit();
1305
1306 if (should_restart_tx(q) &&
1307 test_and_clear_bit(qid, &qs->txq_stopped))
1308 return 2;
1309
1310 q->stops++;
1311 goto addq_exit;
1312 }
1313 return 0;
1314}
1315
1316/**
1317 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1318 * @q: the SGE control Tx queue
1319 *
1320 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1321 * that send only immediate data (presently just the control queues) and
1322 * thus do not have any sk_buffs to release.
1323 */
1324static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1325{
1326 unsigned int reclaim = q->processed - q->cleaned;
1327
1328 q->in_use -= reclaim;
1329 q->cleaned += reclaim;
1330}
1331
1332static inline int immediate(const struct sk_buff *skb)
1333{
27186dc3 1334 return skb->len <= WR_LEN;
4d22de3e
DLR
1335}
1336
1337/**
1338 * ctrl_xmit - send a packet through an SGE control Tx queue
1339 * @adap: the adapter
1340 * @q: the control queue
1341 * @skb: the packet
1342 *
1343 * Send a packet through an SGE control Tx queue. Packets sent through
1344 * a control queue must fit entirely as immediate data in a single Tx
1345 * descriptor and have no page fragments.
1346 */
1347static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1348 struct sk_buff *skb)
1349{
1350 int ret;
1351 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1352
1353 if (unlikely(!immediate(skb))) {
1354 WARN_ON(1);
1355 dev_kfree_skb(skb);
1356 return NET_XMIT_SUCCESS;
1357 }
1358
1359 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1360 wrp->wr_lo = htonl(V_WR_TID(q->token));
1361
1362 spin_lock(&q->lock);
1363 again:reclaim_completed_tx_imm(q);
1364
1365 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1366 if (unlikely(ret)) {
1367 if (ret == 1) {
1368 spin_unlock(&q->lock);
1369 return NET_XMIT_CN;
1370 }
1371 goto again;
1372 }
1373
1374 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1375
1376 q->in_use++;
1377 if (++q->pidx >= q->size) {
1378 q->pidx = 0;
1379 q->gen ^= 1;
1380 }
1381 spin_unlock(&q->lock);
1382 wmb();
1383 t3_write_reg(adap, A_SG_KDOORBELL,
1384 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1385 return NET_XMIT_SUCCESS;
1386}
1387
1388/**
1389 * restart_ctrlq - restart a suspended control queue
1390 * @qs: the queue set cotaining the control queue
1391 *
1392 * Resumes transmission on a suspended Tx control queue.
1393 */
1394static void restart_ctrlq(unsigned long data)
1395{
1396 struct sk_buff *skb;
1397 struct sge_qset *qs = (struct sge_qset *)data;
1398 struct sge_txq *q = &qs->txq[TXQ_CTRL];
4d22de3e
DLR
1399
1400 spin_lock(&q->lock);
1401 again:reclaim_completed_tx_imm(q);
1402
bea3348e
SH
1403 while (q->in_use < q->size &&
1404 (skb = __skb_dequeue(&q->sendq)) != NULL) {
4d22de3e
DLR
1405
1406 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1407
1408 if (++q->pidx >= q->size) {
1409 q->pidx = 0;
1410 q->gen ^= 1;
1411 }
1412 q->in_use++;
1413 }
1414
1415 if (!skb_queue_empty(&q->sendq)) {
1416 set_bit(TXQ_CTRL, &qs->txq_stopped);
1417 smp_mb__after_clear_bit();
1418
1419 if (should_restart_tx(q) &&
1420 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1421 goto again;
1422 q->stops++;
1423 }
1424
1425 spin_unlock(&q->lock);
afefce66 1426 wmb();
bea3348e 1427 t3_write_reg(qs->adap, A_SG_KDOORBELL,
4d22de3e
DLR
1428 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1429}
1430
14ab9892
DLR
1431/*
1432 * Send a management message through control queue 0
1433 */
1434int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1435{
204e2f98 1436 int ret;
bc4b6b52
DLR
1437 local_bh_disable();
1438 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1439 local_bh_enable();
1440
1441 return ret;
14ab9892
DLR
1442}
1443
99d7cf30
DLR
1444/**
1445 * deferred_unmap_destructor - unmap a packet when it is freed
1446 * @skb: the packet
1447 *
1448 * This is the packet destructor used for Tx packets that need to remain
1449 * mapped until they are freed rather than until their Tx descriptors are
1450 * freed.
1451 */
1452static void deferred_unmap_destructor(struct sk_buff *skb)
1453{
1454 int i;
1455 const dma_addr_t *p;
1456 const struct skb_shared_info *si;
1457 const struct deferred_unmap_info *dui;
99d7cf30
DLR
1458
1459 dui = (struct deferred_unmap_info *)skb->head;
1460 p = dui->addr;
1461
23561c94
DLR
1462 if (skb->tail - skb->transport_header)
1463 pci_unmap_single(dui->pdev, *p++,
1464 skb->tail - skb->transport_header,
1465 PCI_DMA_TODEVICE);
99d7cf30
DLR
1466
1467 si = skb_shinfo(skb);
1468 for (i = 0; i < si->nr_frags; i++)
1469 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1470 PCI_DMA_TODEVICE);
1471}
1472
1473static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1474 const struct sg_ent *sgl, int sgl_flits)
1475{
1476 dma_addr_t *p;
1477 struct deferred_unmap_info *dui;
1478
1479 dui = (struct deferred_unmap_info *)skb->head;
1480 dui->pdev = pdev;
1481 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1482 *p++ = be64_to_cpu(sgl->addr[0]);
1483 *p++ = be64_to_cpu(sgl->addr[1]);
1484 }
1485 if (sgl_flits)
1486 *p = be64_to_cpu(sgl->addr[0]);
1487}
1488
4d22de3e
DLR
1489/**
1490 * write_ofld_wr - write an offload work request
1491 * @adap: the adapter
1492 * @skb: the packet to send
1493 * @q: the Tx queue
1494 * @pidx: index of the first Tx descriptor to write
1495 * @gen: the generation value to use
1496 * @ndesc: number of descriptors the packet will occupy
1497 *
1498 * Write an offload work request to send the supplied packet. The packet
1499 * data already carry the work request with most fields populated.
1500 */
1501static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1502 struct sge_txq *q, unsigned int pidx,
1503 unsigned int gen, unsigned int ndesc)
1504{
1505 unsigned int sgl_flits, flits;
1506 struct work_request_hdr *from;
1507 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1508 struct tx_desc *d = &q->desc[pidx];
1509
1510 if (immediate(skb)) {
1511 q->sdesc[pidx].skb = NULL;
1512 write_imm(d, skb, skb->len, gen);
1513 return;
1514 }
1515
1516 /* Only TX_DATA builds SGLs */
1517
1518 from = (struct work_request_hdr *)skb->data;
ea2ae17d
ACM
1519 memcpy(&d->flit[1], &from[1],
1520 skb_transport_offset(skb) - sizeof(*from));
4d22de3e 1521
ea2ae17d 1522 flits = skb_transport_offset(skb) / 8;
4d22de3e 1523 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
9c70220b 1524 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
27a884dc 1525 skb->tail - skb->transport_header,
4d22de3e 1526 adap->pdev);
99d7cf30
DLR
1527 if (need_skb_unmap()) {
1528 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1529 skb->destructor = deferred_unmap_destructor;
99d7cf30 1530 }
4d22de3e
DLR
1531
1532 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1533 gen, from->wr_hi, from->wr_lo);
1534}
1535
1536/**
1537 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1538 * @skb: the packet
1539 *
1540 * Returns the number of Tx descriptors needed for the given offload
1541 * packet. These packets are already fully constructed.
1542 */
1543static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1544{
27186dc3 1545 unsigned int flits, cnt;
4d22de3e 1546
27186dc3 1547 if (skb->len <= WR_LEN)
4d22de3e
DLR
1548 return 1; /* packet fits as immediate data */
1549
ea2ae17d 1550 flits = skb_transport_offset(skb) / 8; /* headers */
27186dc3 1551 cnt = skb_shinfo(skb)->nr_frags;
27a884dc 1552 if (skb->tail != skb->transport_header)
4d22de3e
DLR
1553 cnt++;
1554 return flits_to_desc(flits + sgl_len(cnt));
1555}
1556
1557/**
1558 * ofld_xmit - send a packet through an offload queue
1559 * @adap: the adapter
1560 * @q: the Tx offload queue
1561 * @skb: the packet
1562 *
1563 * Send an offload packet through an SGE offload queue.
1564 */
1565static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1566 struct sk_buff *skb)
1567{
1568 int ret;
1569 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1570
1571 spin_lock(&q->lock);
1572 again:reclaim_completed_tx(adap, q);
1573
1574 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1575 if (unlikely(ret)) {
1576 if (ret == 1) {
1577 skb->priority = ndesc; /* save for restart */
1578 spin_unlock(&q->lock);
1579 return NET_XMIT_CN;
1580 }
1581 goto again;
1582 }
1583
1584 gen = q->gen;
1585 q->in_use += ndesc;
1586 pidx = q->pidx;
1587 q->pidx += ndesc;
1588 if (q->pidx >= q->size) {
1589 q->pidx -= q->size;
1590 q->gen ^= 1;
1591 }
1592 spin_unlock(&q->lock);
1593
1594 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1595 check_ring_tx_db(adap, q);
1596 return NET_XMIT_SUCCESS;
1597}
1598
1599/**
1600 * restart_offloadq - restart a suspended offload queue
1601 * @qs: the queue set cotaining the offload queue
1602 *
1603 * Resumes transmission on a suspended Tx offload queue.
1604 */
1605static void restart_offloadq(unsigned long data)
1606{
1607 struct sk_buff *skb;
1608 struct sge_qset *qs = (struct sge_qset *)data;
1609 struct sge_txq *q = &qs->txq[TXQ_OFLD];
5fbf816f
DLR
1610 const struct port_info *pi = netdev_priv(qs->netdev);
1611 struct adapter *adap = pi->adapter;
4d22de3e
DLR
1612
1613 spin_lock(&q->lock);
1614 again:reclaim_completed_tx(adap, q);
1615
1616 while ((skb = skb_peek(&q->sendq)) != NULL) {
1617 unsigned int gen, pidx;
1618 unsigned int ndesc = skb->priority;
1619
1620 if (unlikely(q->size - q->in_use < ndesc)) {
1621 set_bit(TXQ_OFLD, &qs->txq_stopped);
1622 smp_mb__after_clear_bit();
1623
1624 if (should_restart_tx(q) &&
1625 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1626 goto again;
1627 q->stops++;
1628 break;
1629 }
1630
1631 gen = q->gen;
1632 q->in_use += ndesc;
1633 pidx = q->pidx;
1634 q->pidx += ndesc;
1635 if (q->pidx >= q->size) {
1636 q->pidx -= q->size;
1637 q->gen ^= 1;
1638 }
1639 __skb_unlink(skb, &q->sendq);
1640 spin_unlock(&q->lock);
1641
1642 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1643 spin_lock(&q->lock);
1644 }
1645 spin_unlock(&q->lock);
1646
1647#if USE_GTS
1648 set_bit(TXQ_RUNNING, &q->flags);
1649 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1650#endif
afefce66 1651 wmb();
4d22de3e
DLR
1652 t3_write_reg(adap, A_SG_KDOORBELL,
1653 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1654}
1655
1656/**
1657 * queue_set - return the queue set a packet should use
1658 * @skb: the packet
1659 *
1660 * Maps a packet to the SGE queue set it should use. The desired queue
1661 * set is carried in bits 1-3 in the packet's priority.
1662 */
1663static inline int queue_set(const struct sk_buff *skb)
1664{
1665 return skb->priority >> 1;
1666}
1667
1668/**
1669 * is_ctrl_pkt - return whether an offload packet is a control packet
1670 * @skb: the packet
1671 *
1672 * Determines whether an offload packet should use an OFLD or a CTRL
1673 * Tx queue. This is indicated by bit 0 in the packet's priority.
1674 */
1675static inline int is_ctrl_pkt(const struct sk_buff *skb)
1676{
1677 return skb->priority & 1;
1678}
1679
1680/**
1681 * t3_offload_tx - send an offload packet
1682 * @tdev: the offload device to send to
1683 * @skb: the packet
1684 *
1685 * Sends an offload packet. We use the packet priority to select the
1686 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1687 * should be sent as regular or control, bits 1-3 select the queue set.
1688 */
1689int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1690{
1691 struct adapter *adap = tdev2adap(tdev);
1692 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1693
1694 if (unlikely(is_ctrl_pkt(skb)))
1695 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1696
1697 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1698}
1699
1700/**
1701 * offload_enqueue - add an offload packet to an SGE offload receive queue
1702 * @q: the SGE response queue
1703 * @skb: the packet
1704 *
1705 * Add a new offload packet to an SGE response queue's offload packet
1706 * queue. If the packet is the first on the queue it schedules the RX
1707 * softirq to process the queue.
1708 */
1709static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1710{
147e70e6
DM
1711 int was_empty = skb_queue_empty(&q->rx_queue);
1712
1713 __skb_queue_tail(&q->rx_queue, skb);
1714
1715 if (was_empty) {
4d22de3e
DLR
1716 struct sge_qset *qs = rspq_to_qset(q);
1717
bea3348e 1718 napi_schedule(&qs->napi);
4d22de3e 1719 }
4d22de3e
DLR
1720}
1721
1722/**
1723 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1724 * @tdev: the offload device that will be receiving the packets
1725 * @q: the SGE response queue that assembled the bundle
1726 * @skbs: the partial bundle
1727 * @n: the number of packets in the bundle
1728 *
1729 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1730 */
1731static inline void deliver_partial_bundle(struct t3cdev *tdev,
1732 struct sge_rspq *q,
1733 struct sk_buff *skbs[], int n)
1734{
1735 if (n) {
1736 q->offload_bundles++;
1737 tdev->recv(tdev, skbs, n);
1738 }
1739}
1740
1741/**
1742 * ofld_poll - NAPI handler for offload packets in interrupt mode
1743 * @dev: the network device doing the polling
1744 * @budget: polling budget
1745 *
1746 * The NAPI handler for offload packets when a response queue is serviced
1747 * by the hard interrupt handler, i.e., when it's operating in non-polling
1748 * mode. Creates small packet batches and sends them through the offload
1749 * receive handler. Batches need to be of modest size as we do prefetches
1750 * on the packets in each.
1751 */
bea3348e 1752static int ofld_poll(struct napi_struct *napi, int budget)
4d22de3e 1753{
bea3348e 1754 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
4d22de3e 1755 struct sge_rspq *q = &qs->rspq;
bea3348e
SH
1756 struct adapter *adapter = qs->adap;
1757 int work_done = 0;
4d22de3e 1758
bea3348e 1759 while (work_done < budget) {
147e70e6
DM
1760 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1761 struct sk_buff_head queue;
4d22de3e
DLR
1762 int ngathered;
1763
1764 spin_lock_irq(&q->lock);
147e70e6
DM
1765 __skb_queue_head_init(&queue);
1766 skb_queue_splice_init(&q->rx_queue, &queue);
1767 if (skb_queue_empty(&queue)) {
bea3348e 1768 napi_complete(napi);
4d22de3e 1769 spin_unlock_irq(&q->lock);
bea3348e 1770 return work_done;
4d22de3e 1771 }
4d22de3e
DLR
1772 spin_unlock_irq(&q->lock);
1773
147e70e6
DM
1774 ngathered = 0;
1775 skb_queue_walk_safe(&queue, skb, tmp) {
1776 if (work_done >= budget)
1777 break;
1778 work_done++;
1779
1780 __skb_unlink(skb, &queue);
1781 prefetch(skb->data);
1782 skbs[ngathered] = skb;
4d22de3e
DLR
1783 if (++ngathered == RX_BUNDLE_SIZE) {
1784 q->offload_bundles++;
1785 adapter->tdev.recv(&adapter->tdev, skbs,
1786 ngathered);
1787 ngathered = 0;
1788 }
1789 }
147e70e6
DM
1790 if (!skb_queue_empty(&queue)) {
1791 /* splice remaining packets back onto Rx queue */
4d22de3e 1792 spin_lock_irq(&q->lock);
147e70e6 1793 skb_queue_splice(&queue, &q->rx_queue);
4d22de3e
DLR
1794 spin_unlock_irq(&q->lock);
1795 }
1796 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1797 }
bea3348e
SH
1798
1799 return work_done;
4d22de3e
DLR
1800}
1801
1802/**
1803 * rx_offload - process a received offload packet
1804 * @tdev: the offload device receiving the packet
1805 * @rq: the response queue that received the packet
1806 * @skb: the packet
1807 * @rx_gather: a gather list of packets if we are building a bundle
1808 * @gather_idx: index of the next available slot in the bundle
1809 *
1810 * Process an ingress offload pakcet and add it to the offload ingress
1811 * queue. Returns the index of the next available slot in the bundle.
1812 */
1813static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1814 struct sk_buff *skb, struct sk_buff *rx_gather[],
1815 unsigned int gather_idx)
1816{
459a98ed 1817 skb_reset_mac_header(skb);
c1d2bbe1 1818 skb_reset_network_header(skb);
badff6d0 1819 skb_reset_transport_header(skb);
4d22de3e
DLR
1820
1821 if (rq->polling) {
1822 rx_gather[gather_idx++] = skb;
1823 if (gather_idx == RX_BUNDLE_SIZE) {
1824 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1825 gather_idx = 0;
1826 rq->offload_bundles++;
1827 }
1828 } else
1829 offload_enqueue(rq, skb);
1830
1831 return gather_idx;
1832}
1833
4d22de3e
DLR
1834/**
1835 * restart_tx - check whether to restart suspended Tx queues
1836 * @qs: the queue set to resume
1837 *
1838 * Restarts suspended Tx queues of an SGE queue set if they have enough
1839 * free resources to resume operation.
1840 */
1841static void restart_tx(struct sge_qset *qs)
1842{
1843 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1844 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1845 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1846 qs->txq[TXQ_ETH].restarts++;
1847 if (netif_running(qs->netdev))
82ad3329 1848 netif_tx_wake_queue(qs->tx_q);
4d22de3e
DLR
1849 }
1850
1851 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1852 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1853 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1854 qs->txq[TXQ_OFLD].restarts++;
1855 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1856 }
1857 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1858 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1859 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1860 qs->txq[TXQ_CTRL].restarts++;
1861 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1862 }
1863}
1864
a109a5b9
KX
1865/**
1866 * cxgb3_arp_process - process an ARP request probing a private IP address
1867 * @adapter: the adapter
1868 * @skb: the skbuff containing the ARP request
1869 *
1870 * Check if the ARP request is probing the private IP address
1871 * dedicated to iSCSI, generate an ARP reply if so.
1872 */
1873static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1874{
1875 struct net_device *dev = skb->dev;
1876 struct port_info *pi;
1877 struct arphdr *arp;
1878 unsigned char *arp_ptr;
1879 unsigned char *sha;
1880 __be32 sip, tip;
1881
1882 if (!dev)
1883 return;
1884
1885 skb_reset_network_header(skb);
1886 arp = arp_hdr(skb);
1887
1888 if (arp->ar_op != htons(ARPOP_REQUEST))
1889 return;
1890
1891 arp_ptr = (unsigned char *)(arp + 1);
1892 sha = arp_ptr;
1893 arp_ptr += dev->addr_len;
1894 memcpy(&sip, arp_ptr, sizeof(sip));
1895 arp_ptr += sizeof(sip);
1896 arp_ptr += dev->addr_len;
1897 memcpy(&tip, arp_ptr, sizeof(tip));
1898
1899 pi = netdev_priv(dev);
1900 if (tip != pi->iscsi_ipv4addr)
1901 return;
1902
1903 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1904 dev->dev_addr, sha);
1905
1906}
1907
1908static inline int is_arp(struct sk_buff *skb)
1909{
1910 return skb->protocol == htons(ETH_P_ARP);
1911}
1912
4d22de3e
DLR
1913/**
1914 * rx_eth - process an ingress ethernet packet
1915 * @adap: the adapter
1916 * @rq: the response queue that received the packet
1917 * @skb: the packet
1918 * @pad: amount of padding at the start of the buffer
1919 *
1920 * Process an ingress ethernet pakcet and deliver it to the stack.
1921 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1922 * if it was immediate data in a response.
1923 */
1924static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
b47385bd 1925 struct sk_buff *skb, int pad, int lro)
4d22de3e
DLR
1926{
1927 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
b47385bd 1928 struct sge_qset *qs = rspq_to_qset(rq);
4d22de3e
DLR
1929 struct port_info *pi;
1930
4d22de3e 1931 skb_pull(skb, sizeof(*p) + pad);
4c13eb66 1932 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
4d22de3e 1933 pi = netdev_priv(skb->dev);
47fd23fe 1934 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) &&
4d22de3e 1935 !p->fragment) {
a109a5b9 1936 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
4d22de3e
DLR
1937 skb->ip_summed = CHECKSUM_UNNECESSARY;
1938 } else
1939 skb->ip_summed = CHECKSUM_NONE;
1940
1941 if (unlikely(p->vlan_valid)) {
1942 struct vlan_group *grp = pi->vlan_grp;
1943
b47385bd 1944 qs->port_stats[SGE_PSTAT_VLANEX]++;
4d22de3e 1945 if (likely(grp))
b47385bd 1946 if (lro)
7be2df45
HX
1947 vlan_gro_receive(&qs->napi, grp,
1948 ntohs(p->vlan), skb);
a109a5b9
KX
1949 else {
1950 if (unlikely(pi->iscsi_ipv4addr &&
1951 is_arp(skb))) {
1952 unsigned short vtag = ntohs(p->vlan) &
1953 VLAN_VID_MASK;
1954 skb->dev = vlan_group_get_device(grp,
1955 vtag);
1956 cxgb3_arp_process(adap, skb);
1957 }
b47385bd
DLR
1958 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1959 rq->polling);
a109a5b9 1960 }
4d22de3e
DLR
1961 else
1962 dev_kfree_skb_any(skb);
b47385bd
DLR
1963 } else if (rq->polling) {
1964 if (lro)
7be2df45 1965 napi_gro_receive(&qs->napi, skb);
a109a5b9
KX
1966 else {
1967 if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
1968 cxgb3_arp_process(adap, skb);
b47385bd 1969 netif_receive_skb(skb);
a109a5b9 1970 }
b47385bd 1971 } else
4d22de3e
DLR
1972 netif_rx(skb);
1973}
1974
b47385bd
DLR
1975static inline int is_eth_tcp(u32 rss)
1976{
1977 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
1978}
1979
b47385bd
DLR
1980/**
1981 * lro_add_page - add a page chunk to an LRO session
1982 * @adap: the adapter
1983 * @qs: the associated queue set
1984 * @fl: the free list containing the page chunk to add
1985 * @len: packet length
1986 * @complete: Indicates the last fragment of a frame
1987 *
1988 * Add a received packet contained in a page chunk to an existing LRO
1989 * session.
1990 */
1991static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
1992 struct sge_fl *fl, int len, int complete)
1993{
1994 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
1995 struct cpl_rx_pkt *cpl;
7be2df45
HX
1996 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
1997 int nr_frags = qs->lro_frag_tbl.nr_frags;
1998 int frag_len = qs->lro_frag_tbl.len;
b47385bd
DLR
1999 int offset = 0;
2000
2001 if (!nr_frags) {
2002 offset = 2 + sizeof(struct cpl_rx_pkt);
2003 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2004 }
2005
2006 fl->credits--;
2007
2008 len -= offset;
2009 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
2010 fl->buf_size, PCI_DMA_FROMDEVICE);
2011
2012 rx_frag += nr_frags;
2013 rx_frag->page = sd->pg_chunk.page;
2014 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2015 rx_frag->size = len;
2016 frag_len += len;
7be2df45
HX
2017 qs->lro_frag_tbl.nr_frags++;
2018 qs->lro_frag_tbl.len = frag_len;
b47385bd
DLR
2019
2020 if (!complete)
2021 return;
2022
7be2df45 2023 qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
b47385bd
DLR
2024 cpl = qs->lro_va;
2025
2026 if (unlikely(cpl->vlan_valid)) {
2027 struct net_device *dev = qs->netdev;
2028 struct port_info *pi = netdev_priv(dev);
2029 struct vlan_group *grp = pi->vlan_grp;
2030
2031 if (likely(grp != NULL)) {
7be2df45
HX
2032 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2033 &qs->lro_frag_tbl);
2034 goto out;
b47385bd
DLR
2035 }
2036 }
7be2df45 2037 napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
b47385bd 2038
7be2df45
HX
2039out:
2040 qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
b47385bd
DLR
2041}
2042
4d22de3e
DLR
2043/**
2044 * handle_rsp_cntrl_info - handles control information in a response
2045 * @qs: the queue set corresponding to the response
2046 * @flags: the response control flags
4d22de3e
DLR
2047 *
2048 * Handles the control information of an SGE response, such as GTS
2049 * indications and completion credits for the queue set's Tx queues.
6195c71d 2050 * HW coalesces credits, we don't do any extra SW coalescing.
4d22de3e 2051 */
6195c71d 2052static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
4d22de3e
DLR
2053{
2054 unsigned int credits;
2055
2056#if USE_GTS
2057 if (flags & F_RSPD_TXQ0_GTS)
2058 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2059#endif
2060
4d22de3e
DLR
2061 credits = G_RSPD_TXQ0_CR(flags);
2062 if (credits)
2063 qs->txq[TXQ_ETH].processed += credits;
2064
6195c71d
DLR
2065 credits = G_RSPD_TXQ2_CR(flags);
2066 if (credits)
2067 qs->txq[TXQ_CTRL].processed += credits;
2068
4d22de3e
DLR
2069# if USE_GTS
2070 if (flags & F_RSPD_TXQ1_GTS)
2071 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2072# endif
6195c71d
DLR
2073 credits = G_RSPD_TXQ1_CR(flags);
2074 if (credits)
2075 qs->txq[TXQ_OFLD].processed += credits;
4d22de3e
DLR
2076}
2077
2078/**
2079 * check_ring_db - check if we need to ring any doorbells
2080 * @adapter: the adapter
2081 * @qs: the queue set whose Tx queues are to be examined
2082 * @sleeping: indicates which Tx queue sent GTS
2083 *
2084 * Checks if some of a queue set's Tx queues need to ring their doorbells
2085 * to resume transmission after idling while they still have unprocessed
2086 * descriptors.
2087 */
2088static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2089 unsigned int sleeping)
2090{
2091 if (sleeping & F_RSPD_TXQ0_GTS) {
2092 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2093
2094 if (txq->cleaned + txq->in_use != txq->processed &&
2095 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2096 set_bit(TXQ_RUNNING, &txq->flags);
2097 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2098 V_EGRCNTX(txq->cntxt_id));
2099 }
2100 }
2101
2102 if (sleeping & F_RSPD_TXQ1_GTS) {
2103 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2104
2105 if (txq->cleaned + txq->in_use != txq->processed &&
2106 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2107 set_bit(TXQ_RUNNING, &txq->flags);
2108 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2109 V_EGRCNTX(txq->cntxt_id));
2110 }
2111 }
2112}
2113
2114/**
2115 * is_new_response - check if a response is newly written
2116 * @r: the response descriptor
2117 * @q: the response queue
2118 *
2119 * Returns true if a response descriptor contains a yet unprocessed
2120 * response.
2121 */
2122static inline int is_new_response(const struct rsp_desc *r,
2123 const struct sge_rspq *q)
2124{
2125 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2126}
2127
7385ecf3
DLR
2128static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2129{
2130 q->pg_skb = NULL;
2131 q->rx_recycle_buf = 0;
2132}
2133
4d22de3e
DLR
2134#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2135#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2136 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2137 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2138 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2139
2140/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2141#define NOMEM_INTR_DELAY 2500
2142
2143/**
2144 * process_responses - process responses from an SGE response queue
2145 * @adap: the adapter
2146 * @qs: the queue set to which the response queue belongs
2147 * @budget: how many responses can be processed in this round
2148 *
2149 * Process responses from an SGE response queue up to the supplied budget.
2150 * Responses include received packets as well as credits and other events
2151 * for the queues that belong to the response queue's queue set.
2152 * A negative budget is effectively unlimited.
2153 *
2154 * Additionally choose the interrupt holdoff time for the next interrupt
2155 * on this queue. If the system is under memory shortage use a fairly
2156 * long delay to help recovery.
2157 */
2158static int process_responses(struct adapter *adap, struct sge_qset *qs,
2159 int budget)
2160{
2161 struct sge_rspq *q = &qs->rspq;
2162 struct rsp_desc *r = &q->desc[q->cidx];
2163 int budget_left = budget;
6195c71d 2164 unsigned int sleeping = 0;
4d22de3e
DLR
2165 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2166 int ngathered = 0;
2167
2168 q->next_holdoff = q->holdoff_tmr;
2169
2170 while (likely(budget_left && is_new_response(r, q))) {
b47385bd 2171 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
4d22de3e
DLR
2172 struct sk_buff *skb = NULL;
2173 u32 len, flags = ntohl(r->flags);
7385ecf3
DLR
2174 __be32 rss_hi = *(const __be32 *)r,
2175 rss_lo = r->rss_hdr.rss_hash_val;
4d22de3e
DLR
2176
2177 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2178
2179 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2180 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2181 if (!skb)
2182 goto no_mem;
2183
2184 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2185 skb->data[0] = CPL_ASYNC_NOTIF;
2186 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2187 q->async_notif++;
2188 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2189 skb = get_imm_packet(r);
2190 if (unlikely(!skb)) {
cf992af5 2191no_mem:
4d22de3e
DLR
2192 q->next_holdoff = NOMEM_INTR_DELAY;
2193 q->nomem++;
2194 /* consume one credit since we tried */
2195 budget_left--;
2196 break;
2197 }
2198 q->imm_data++;
e0994eb1 2199 ethpad = 0;
4d22de3e 2200 } else if ((len = ntohl(r->len_cq)) != 0) {
cf992af5 2201 struct sge_fl *fl;
e0994eb1 2202
b47385bd
DLR
2203 if (eth)
2204 lro = qs->lro_enabled && is_eth_tcp(rss_hi);
2205
cf992af5
DLR
2206 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2207 if (fl->use_pages) {
2208 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
e0994eb1 2209
cf992af5
DLR
2210 prefetch(addr);
2211#if L1_CACHE_BYTES < 128
2212 prefetch(addr + L1_CACHE_BYTES);
2213#endif
e0994eb1 2214 __refill_fl(adap, fl);
b47385bd
DLR
2215 if (lro > 0) {
2216 lro_add_page(adap, qs, fl,
2217 G_RSPD_LEN(len),
2218 flags & F_RSPD_EOP);
2219 goto next_fl;
2220 }
e0994eb1 2221
7385ecf3
DLR
2222 skb = get_packet_pg(adap, fl, q,
2223 G_RSPD_LEN(len),
2224 eth ?
2225 SGE_RX_DROP_THRES : 0);
2226 q->pg_skb = skb;
cf992af5 2227 } else
e0994eb1
DLR
2228 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2229 eth ? SGE_RX_DROP_THRES : 0);
cf992af5
DLR
2230 if (unlikely(!skb)) {
2231 if (!eth)
2232 goto no_mem;
2233 q->rx_drops++;
2234 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2235 __skb_pull(skb, 2);
b47385bd 2236next_fl:
4d22de3e
DLR
2237 if (++fl->cidx == fl->size)
2238 fl->cidx = 0;
2239 } else
2240 q->pure_rsps++;
2241
2242 if (flags & RSPD_CTRL_MASK) {
2243 sleeping |= flags & RSPD_GTS_MASK;
6195c71d 2244 handle_rsp_cntrl_info(qs, flags);
4d22de3e
DLR
2245 }
2246
2247 r++;
2248 if (unlikely(++q->cidx == q->size)) {
2249 q->cidx = 0;
2250 q->gen ^= 1;
2251 r = q->desc;
2252 }
2253 prefetch(r);
2254
2255 if (++q->credits >= (q->size / 4)) {
2256 refill_rspq(adap, q, q->credits);
2257 q->credits = 0;
2258 }
2259
7385ecf3
DLR
2260 packet_complete = flags &
2261 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2262 F_RSPD_ASYNC_NOTIF);
2263
2264 if (skb != NULL && packet_complete) {
4d22de3e 2265 if (eth)
b47385bd 2266 rx_eth(adap, q, skb, ethpad, lro);
4d22de3e 2267 else {
afefce66 2268 q->offload_pkts++;
cf992af5
DLR
2269 /* Preserve the RSS info in csum & priority */
2270 skb->csum = rss_hi;
2271 skb->priority = rss_lo;
2272 ngathered = rx_offload(&adap->tdev, q, skb,
2273 offload_skbs,
e0994eb1 2274 ngathered);
4d22de3e 2275 }
7385ecf3
DLR
2276
2277 if (flags & F_RSPD_EOP)
b47385bd 2278 clear_rspq_bufstate(q);
4d22de3e 2279 }
4d22de3e
DLR
2280 --budget_left;
2281 }
2282
4d22de3e 2283 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
b47385bd 2284
4d22de3e
DLR
2285 if (sleeping)
2286 check_ring_db(adap, qs, sleeping);
2287
2288 smp_mb(); /* commit Tx queue .processed updates */
2289 if (unlikely(qs->txq_stopped != 0))
2290 restart_tx(qs);
2291
2292 budget -= budget_left;
2293 return budget;
2294}
2295
2296static inline int is_pure_response(const struct rsp_desc *r)
2297{
c5419e6f 2298 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
4d22de3e
DLR
2299
2300 return (n | r->len_cq) == 0;
2301}
2302
2303/**
2304 * napi_rx_handler - the NAPI handler for Rx processing
bea3348e 2305 * @napi: the napi instance
4d22de3e
DLR
2306 * @budget: how many packets we can process in this round
2307 *
2308 * Handler for new data events when using NAPI.
2309 */
bea3348e 2310static int napi_rx_handler(struct napi_struct *napi, int budget)
4d22de3e 2311{
bea3348e
SH
2312 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2313 struct adapter *adap = qs->adap;
2314 int work_done = process_responses(adap, qs, budget);
4d22de3e 2315
bea3348e
SH
2316 if (likely(work_done < budget)) {
2317 napi_complete(napi);
4d22de3e 2318
bea3348e
SH
2319 /*
2320 * Because we don't atomically flush the following
2321 * write it is possible that in very rare cases it can
2322 * reach the device in a way that races with a new
2323 * response being written plus an error interrupt
2324 * causing the NAPI interrupt handler below to return
2325 * unhandled status to the OS. To protect against
2326 * this would require flushing the write and doing
2327 * both the write and the flush with interrupts off.
2328 * Way too expensive and unjustifiable given the
2329 * rarity of the race.
2330 *
2331 * The race cannot happen at all with MSI-X.
2332 */
2333 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2334 V_NEWTIMER(qs->rspq.next_holdoff) |
2335 V_NEWINDEX(qs->rspq.cidx));
2336 }
2337 return work_done;
4d22de3e
DLR
2338}
2339
2340/*
2341 * Returns true if the device is already scheduled for polling.
2342 */
bea3348e 2343static inline int napi_is_scheduled(struct napi_struct *napi)
4d22de3e 2344{
bea3348e 2345 return test_bit(NAPI_STATE_SCHED, &napi->state);
4d22de3e
DLR
2346}
2347
2348/**
2349 * process_pure_responses - process pure responses from a response queue
2350 * @adap: the adapter
2351 * @qs: the queue set owning the response queue
2352 * @r: the first pure response to process
2353 *
2354 * A simpler version of process_responses() that handles only pure (i.e.,
2355 * non data-carrying) responses. Such respones are too light-weight to
2356 * justify calling a softirq under NAPI, so we handle them specially in
2357 * the interrupt handler. The function is called with a pointer to a
2358 * response, which the caller must ensure is a valid pure response.
2359 *
2360 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2361 */
2362static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2363 struct rsp_desc *r)
2364{
2365 struct sge_rspq *q = &qs->rspq;
6195c71d 2366 unsigned int sleeping = 0;
4d22de3e
DLR
2367
2368 do {
2369 u32 flags = ntohl(r->flags);
2370
2371 r++;
2372 if (unlikely(++q->cidx == q->size)) {
2373 q->cidx = 0;
2374 q->gen ^= 1;
2375 r = q->desc;
2376 }
2377 prefetch(r);
2378
2379 if (flags & RSPD_CTRL_MASK) {
2380 sleeping |= flags & RSPD_GTS_MASK;
6195c71d 2381 handle_rsp_cntrl_info(qs, flags);
4d22de3e
DLR
2382 }
2383
2384 q->pure_rsps++;
2385 if (++q->credits >= (q->size / 4)) {
2386 refill_rspq(adap, q, q->credits);
2387 q->credits = 0;
2388 }
2389 } while (is_new_response(r, q) && is_pure_response(r));
2390
4d22de3e
DLR
2391 if (sleeping)
2392 check_ring_db(adap, qs, sleeping);
2393
2394 smp_mb(); /* commit Tx queue .processed updates */
2395 if (unlikely(qs->txq_stopped != 0))
2396 restart_tx(qs);
2397
2398 return is_new_response(r, q);
2399}
2400
2401/**
2402 * handle_responses - decide what to do with new responses in NAPI mode
2403 * @adap: the adapter
2404 * @q: the response queue
2405 *
2406 * This is used by the NAPI interrupt handlers to decide what to do with
2407 * new SGE responses. If there are no new responses it returns -1. If
2408 * there are new responses and they are pure (i.e., non-data carrying)
2409 * it handles them straight in hard interrupt context as they are very
2410 * cheap and don't deliver any packets. Finally, if there are any data
2411 * signaling responses it schedules the NAPI handler. Returns 1 if it
2412 * schedules NAPI, 0 if all new responses were pure.
2413 *
2414 * The caller must ascertain NAPI is not already running.
2415 */
2416static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2417{
2418 struct sge_qset *qs = rspq_to_qset(q);
2419 struct rsp_desc *r = &q->desc[q->cidx];
2420
2421 if (!is_new_response(r, q))
2422 return -1;
2423 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2424 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2425 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2426 return 0;
2427 }
bea3348e 2428 napi_schedule(&qs->napi);
4d22de3e
DLR
2429 return 1;
2430}
2431
2432/*
2433 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2434 * (i.e., response queue serviced in hard interrupt).
2435 */
2436irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2437{
2438 struct sge_qset *qs = cookie;
bea3348e 2439 struct adapter *adap = qs->adap;
4d22de3e
DLR
2440 struct sge_rspq *q = &qs->rspq;
2441
2442 spin_lock(&q->lock);
2443 if (process_responses(adap, qs, -1) == 0)
2444 q->unhandled_irqs++;
2445 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2446 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2447 spin_unlock(&q->lock);
2448 return IRQ_HANDLED;
2449}
2450
2451/*
2452 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2453 * (i.e., response queue serviced by NAPI polling).
2454 */
9265fabf 2455static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
4d22de3e
DLR
2456{
2457 struct sge_qset *qs = cookie;
4d22de3e
DLR
2458 struct sge_rspq *q = &qs->rspq;
2459
2460 spin_lock(&q->lock);
4d22de3e 2461
bea3348e 2462 if (handle_responses(qs->adap, q) < 0)
4d22de3e
DLR
2463 q->unhandled_irqs++;
2464 spin_unlock(&q->lock);
2465 return IRQ_HANDLED;
2466}
2467
2468/*
2469 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2470 * SGE response queues as well as error and other async events as they all use
2471 * the same MSI vector. We use one SGE response queue per port in this mode
2472 * and protect all response queues with queue 0's lock.
2473 */
2474static irqreturn_t t3_intr_msi(int irq, void *cookie)
2475{
2476 int new_packets = 0;
2477 struct adapter *adap = cookie;
2478 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2479
2480 spin_lock(&q->lock);
2481
2482 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2483 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2484 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2485 new_packets = 1;
2486 }
2487
2488 if (adap->params.nports == 2 &&
2489 process_responses(adap, &adap->sge.qs[1], -1)) {
2490 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2491
2492 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2493 V_NEWTIMER(q1->next_holdoff) |
2494 V_NEWINDEX(q1->cidx));
2495 new_packets = 1;
2496 }
2497
2498 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2499 q->unhandled_irqs++;
2500
2501 spin_unlock(&q->lock);
2502 return IRQ_HANDLED;
2503}
2504
bea3348e 2505static int rspq_check_napi(struct sge_qset *qs)
4d22de3e 2506{
bea3348e
SH
2507 struct sge_rspq *q = &qs->rspq;
2508
2509 if (!napi_is_scheduled(&qs->napi) &&
2510 is_new_response(&q->desc[q->cidx], q)) {
2511 napi_schedule(&qs->napi);
4d22de3e
DLR
2512 return 1;
2513 }
2514 return 0;
2515}
2516
2517/*
2518 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2519 * by NAPI polling). Handles data events from SGE response queues as well as
2520 * error and other async events as they all use the same MSI vector. We use
2521 * one SGE response queue per port in this mode and protect all response
2522 * queues with queue 0's lock.
2523 */
9265fabf 2524static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
4d22de3e
DLR
2525{
2526 int new_packets;
2527 struct adapter *adap = cookie;
2528 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2529
2530 spin_lock(&q->lock);
2531
bea3348e 2532 new_packets = rspq_check_napi(&adap->sge.qs[0]);
4d22de3e 2533 if (adap->params.nports == 2)
bea3348e 2534 new_packets += rspq_check_napi(&adap->sge.qs[1]);
4d22de3e
DLR
2535 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2536 q->unhandled_irqs++;
2537
2538 spin_unlock(&q->lock);
2539 return IRQ_HANDLED;
2540}
2541
2542/*
2543 * A helper function that processes responses and issues GTS.
2544 */
2545static inline int process_responses_gts(struct adapter *adap,
2546 struct sge_rspq *rq)
2547{
2548 int work;
2549
2550 work = process_responses(adap, rspq_to_qset(rq), -1);
2551 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2552 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2553 return work;
2554}
2555
2556/*
2557 * The legacy INTx interrupt handler. This needs to handle data events from
2558 * SGE response queues as well as error and other async events as they all use
2559 * the same interrupt pin. We use one SGE response queue per port in this mode
2560 * and protect all response queues with queue 0's lock.
2561 */
2562static irqreturn_t t3_intr(int irq, void *cookie)
2563{
2564 int work_done, w0, w1;
2565 struct adapter *adap = cookie;
2566 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2567 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2568
2569 spin_lock(&q0->lock);
2570
2571 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2572 w1 = adap->params.nports == 2 &&
2573 is_new_response(&q1->desc[q1->cidx], q1);
2574
2575 if (likely(w0 | w1)) {
2576 t3_write_reg(adap, A_PL_CLI, 0);
2577 t3_read_reg(adap, A_PL_CLI); /* flush */
2578
2579 if (likely(w0))
2580 process_responses_gts(adap, q0);
2581
2582 if (w1)
2583 process_responses_gts(adap, q1);
2584
2585 work_done = w0 | w1;
2586 } else
2587 work_done = t3_slow_intr_handler(adap);
2588
2589 spin_unlock(&q0->lock);
2590 return IRQ_RETVAL(work_done != 0);
2591}
2592
2593/*
2594 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2595 * Handles data events from SGE response queues as well as error and other
2596 * async events as they all use the same interrupt pin. We use one SGE
2597 * response queue per port in this mode and protect all response queues with
2598 * queue 0's lock.
2599 */
2600static irqreturn_t t3b_intr(int irq, void *cookie)
2601{
2602 u32 map;
2603 struct adapter *adap = cookie;
2604 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2605
2606 t3_write_reg(adap, A_PL_CLI, 0);
2607 map = t3_read_reg(adap, A_SG_DATA_INTR);
2608
2609 if (unlikely(!map)) /* shared interrupt, most likely */
2610 return IRQ_NONE;
2611
2612 spin_lock(&q0->lock);
2613
2614 if (unlikely(map & F_ERRINTR))
2615 t3_slow_intr_handler(adap);
2616
2617 if (likely(map & 1))
2618 process_responses_gts(adap, q0);
2619
2620 if (map & 2)
2621 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2622
2623 spin_unlock(&q0->lock);
2624 return IRQ_HANDLED;
2625}
2626
2627/*
2628 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2629 * Handles data events from SGE response queues as well as error and other
2630 * async events as they all use the same interrupt pin. We use one SGE
2631 * response queue per port in this mode and protect all response queues with
2632 * queue 0's lock.
2633 */
2634static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2635{
2636 u32 map;
4d22de3e 2637 struct adapter *adap = cookie;
bea3348e
SH
2638 struct sge_qset *qs0 = &adap->sge.qs[0];
2639 struct sge_rspq *q0 = &qs0->rspq;
4d22de3e
DLR
2640
2641 t3_write_reg(adap, A_PL_CLI, 0);
2642 map = t3_read_reg(adap, A_SG_DATA_INTR);
2643
2644 if (unlikely(!map)) /* shared interrupt, most likely */
2645 return IRQ_NONE;
2646
2647 spin_lock(&q0->lock);
2648
2649 if (unlikely(map & F_ERRINTR))
2650 t3_slow_intr_handler(adap);
2651
bea3348e
SH
2652 if (likely(map & 1))
2653 napi_schedule(&qs0->napi);
4d22de3e 2654
bea3348e
SH
2655 if (map & 2)
2656 napi_schedule(&adap->sge.qs[1].napi);
4d22de3e
DLR
2657
2658 spin_unlock(&q0->lock);
2659 return IRQ_HANDLED;
2660}
2661
2662/**
2663 * t3_intr_handler - select the top-level interrupt handler
2664 * @adap: the adapter
2665 * @polling: whether using NAPI to service response queues
2666 *
2667 * Selects the top-level interrupt handler based on the type of interrupts
2668 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2669 * response queues.
2670 */
7c239975 2671irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
4d22de3e
DLR
2672{
2673 if (adap->flags & USING_MSIX)
2674 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2675 if (adap->flags & USING_MSI)
2676 return polling ? t3_intr_msi_napi : t3_intr_msi;
2677 if (adap->params.rev > 0)
2678 return polling ? t3b_intr_napi : t3b_intr;
2679 return t3_intr;
2680}
2681
b881955b
DLR
2682#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2683 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2684 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2685 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2686 F_HIRCQPARITYERROR)
2687#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2688#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2689 F_RSPQDISABLED)
2690
4d22de3e
DLR
2691/**
2692 * t3_sge_err_intr_handler - SGE async event interrupt handler
2693 * @adapter: the adapter
2694 *
2695 * Interrupt handler for SGE asynchronous (non-data) events.
2696 */
2697void t3_sge_err_intr_handler(struct adapter *adapter)
2698{
2699 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2700
b881955b
DLR
2701 if (status & SGE_PARERR)
2702 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2703 status & SGE_PARERR);
2704 if (status & SGE_FRAMINGERR)
2705 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2706 status & SGE_FRAMINGERR);
2707
4d22de3e
DLR
2708 if (status & F_RSPQCREDITOVERFOW)
2709 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2710
2711 if (status & F_RSPQDISABLED) {
2712 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2713
2714 CH_ALERT(adapter,
2715 "packet delivered to disabled response queue "
2716 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2717 }
2718
6e3f03b7
DLR
2719 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2720 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2721 status & F_HIPIODRBDROPERR ? "high" : "lo");
2722
4d22de3e 2723 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
b881955b 2724 if (status & SGE_FATALERR)
4d22de3e
DLR
2725 t3_fatal_err(adapter);
2726}
2727
2728/**
2729 * sge_timer_cb - perform periodic maintenance of an SGE qset
2730 * @data: the SGE queue set to maintain
2731 *
2732 * Runs periodically from a timer to perform maintenance of an SGE queue
2733 * set. It performs two tasks:
2734 *
2735 * a) Cleans up any completed Tx descriptors that may still be pending.
2736 * Normal descriptor cleanup happens when new packets are added to a Tx
2737 * queue so this timer is relatively infrequent and does any cleanup only
2738 * if the Tx queue has not seen any new packets in a while. We make a
2739 * best effort attempt to reclaim descriptors, in that we don't wait
2740 * around if we cannot get a queue's lock (which most likely is because
2741 * someone else is queueing new packets and so will also handle the clean
2742 * up). Since control queues use immediate data exclusively we don't
2743 * bother cleaning them up here.
2744 *
2745 * b) Replenishes Rx queues that have run out due to memory shortage.
2746 * Normally new Rx buffers are added when existing ones are consumed but
2747 * when out of memory a queue can become empty. We try to add only a few
2748 * buffers here, the queue will be replenished fully as these new buffers
2749 * are used up if memory shortage has subsided.
2750 */
2751static void sge_timer_cb(unsigned long data)
2752{
2753 spinlock_t *lock;
2754 struct sge_qset *qs = (struct sge_qset *)data;
bea3348e 2755 struct adapter *adap = qs->adap;
4d22de3e
DLR
2756
2757 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2758 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2759 spin_unlock(&qs->txq[TXQ_ETH].lock);
2760 }
2761 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2762 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2763 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2764 }
2765 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
bea3348e 2766 &adap->sge.qs[0].rspq.lock;
4d22de3e 2767 if (spin_trylock_irq(lock)) {
bea3348e 2768 if (!napi_is_scheduled(&qs->napi)) {
bae73f44
DLR
2769 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2770
4d22de3e
DLR
2771 if (qs->fl[0].credits < qs->fl[0].size)
2772 __refill_fl(adap, &qs->fl[0]);
2773 if (qs->fl[1].credits < qs->fl[1].size)
2774 __refill_fl(adap, &qs->fl[1]);
bae73f44
DLR
2775
2776 if (status & (1 << qs->rspq.cntxt_id)) {
2777 qs->rspq.starved++;
2778 if (qs->rspq.credits) {
2779 refill_rspq(adap, &qs->rspq, 1);
2780 qs->rspq.credits--;
2781 qs->rspq.restarted++;
e0994eb1 2782 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
bae73f44
DLR
2783 1 << qs->rspq.cntxt_id);
2784 }
2785 }
4d22de3e
DLR
2786 }
2787 spin_unlock_irq(lock);
2788 }
2789 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2790}
2791
2792/**
2793 * t3_update_qset_coalesce - update coalescing settings for a queue set
2794 * @qs: the SGE queue set
2795 * @p: new queue set parameters
2796 *
2797 * Update the coalescing settings for an SGE queue set. Nothing is done
2798 * if the queue set is not initialized yet.
2799 */
2800void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2801{
4d22de3e
DLR
2802 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2803 qs->rspq.polling = p->polling;
bea3348e 2804 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
4d22de3e
DLR
2805}
2806
2807/**
2808 * t3_sge_alloc_qset - initialize an SGE queue set
2809 * @adapter: the adapter
2810 * @id: the queue set id
2811 * @nports: how many Ethernet ports will be using this queue set
2812 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2813 * @p: configuration parameters for this queue set
2814 * @ntxq: number of Tx queues for the queue set
2815 * @netdev: net device associated with this queue set
82ad3329 2816 * @netdevq: net device TX queue associated with this queue set
4d22de3e
DLR
2817 *
2818 * Allocate resources and initialize an SGE queue set. A queue set
2819 * comprises a response queue, two Rx free-buffer queues, and up to 3
2820 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2821 * queue, offload queue, and control queue.
2822 */
2823int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2824 int irq_vec_idx, const struct qset_params *p,
82ad3329
DLR
2825 int ntxq, struct net_device *dev,
2826 struct netdev_queue *netdevq)
4d22de3e 2827{
b1fb1f28 2828 int i, avail, ret = -ENOMEM;
4d22de3e
DLR
2829 struct sge_qset *q = &adapter->sge.qs[id];
2830
2831 init_qset_cntxt(q, id);
20d3fc11 2832 setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
4d22de3e
DLR
2833
2834 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2835 sizeof(struct rx_desc),
2836 sizeof(struct rx_sw_desc),
2837 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2838 if (!q->fl[0].desc)
2839 goto err;
2840
2841 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2842 sizeof(struct rx_desc),
2843 sizeof(struct rx_sw_desc),
2844 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2845 if (!q->fl[1].desc)
2846 goto err;
2847
2848 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2849 sizeof(struct rsp_desc), 0,
2850 &q->rspq.phys_addr, NULL);
2851 if (!q->rspq.desc)
2852 goto err;
2853
2854 for (i = 0; i < ntxq; ++i) {
2855 /*
2856 * The control queue always uses immediate data so does not
2857 * need to keep track of any sk_buffs.
2858 */
2859 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2860
2861 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2862 sizeof(struct tx_desc), sz,
2863 &q->txq[i].phys_addr,
2864 &q->txq[i].sdesc);
2865 if (!q->txq[i].desc)
2866 goto err;
2867
2868 q->txq[i].gen = 1;
2869 q->txq[i].size = p->txq_size[i];
2870 spin_lock_init(&q->txq[i].lock);
2871 skb_queue_head_init(&q->txq[i].sendq);
2872 }
2873
2874 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2875 (unsigned long)q);
2876 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2877 (unsigned long)q);
2878
2879 q->fl[0].gen = q->fl[1].gen = 1;
2880 q->fl[0].size = p->fl_size;
2881 q->fl[1].size = p->jumbo_size;
2882
2883 q->rspq.gen = 1;
2884 q->rspq.size = p->rspq_size;
2885 spin_lock_init(&q->rspq.lock);
147e70e6 2886 skb_queue_head_init(&q->rspq.rx_queue);
4d22de3e
DLR
2887
2888 q->txq[TXQ_ETH].stop_thres = nports *
2889 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2890
cf992af5
DLR
2891#if FL0_PG_CHUNK_SIZE > 0
2892 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
e0994eb1 2893#else
cf992af5 2894 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
e0994eb1 2895#endif
7385ecf3
DLR
2896#if FL1_PG_CHUNK_SIZE > 0
2897 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
2898#else
cf992af5
DLR
2899 q->fl[1].buf_size = is_offload(adapter) ?
2900 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2901 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
7385ecf3
DLR
2902#endif
2903
2904 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2905 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2906 q->fl[0].order = FL0_PG_ORDER;
2907 q->fl[1].order = FL1_PG_ORDER;
4d22de3e 2908
b1186dee 2909 spin_lock_irq(&adapter->sge.reg_lock);
4d22de3e
DLR
2910
2911 /* FL threshold comparison uses < */
2912 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2913 q->rspq.phys_addr, q->rspq.size,
2914 q->fl[0].buf_size, 1, 0);
2915 if (ret)
2916 goto err_unlock;
2917
2918 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2919 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2920 q->fl[i].phys_addr, q->fl[i].size,
2921 q->fl[i].buf_size, p->cong_thres, 1,
2922 0);
2923 if (ret)
2924 goto err_unlock;
2925 }
2926
2927 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2928 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2929 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2930 1, 0);
2931 if (ret)
2932 goto err_unlock;
2933
2934 if (ntxq > 1) {
2935 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2936 USE_GTS, SGE_CNTXT_OFLD, id,
2937 q->txq[TXQ_OFLD].phys_addr,
2938 q->txq[TXQ_OFLD].size, 0, 1, 0);
2939 if (ret)
2940 goto err_unlock;
2941 }
2942
2943 if (ntxq > 2) {
2944 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2945 SGE_CNTXT_CTRL, id,
2946 q->txq[TXQ_CTRL].phys_addr,
2947 q->txq[TXQ_CTRL].size,
2948 q->txq[TXQ_CTRL].token, 1, 0);
2949 if (ret)
2950 goto err_unlock;
2951 }
2952
b1186dee 2953 spin_unlock_irq(&adapter->sge.reg_lock);
4d22de3e 2954
bea3348e
SH
2955 q->adap = adapter;
2956 q->netdev = dev;
82ad3329 2957 q->tx_q = netdevq;
bea3348e 2958 t3_update_qset_coalesce(q, p);
b47385bd 2959
7385ecf3
DLR
2960 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
2961 GFP_KERNEL | __GFP_COMP);
b1fb1f28
DLR
2962 if (!avail) {
2963 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2964 goto err;
2965 }
2966 if (avail < q->fl[0].size)
2967 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2968 avail);
2969
7385ecf3
DLR
2970 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
2971 GFP_KERNEL | __GFP_COMP);
b1fb1f28
DLR
2972 if (avail < q->fl[1].size)
2973 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2974 avail);
4d22de3e
DLR
2975 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2976
2977 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2978 V_NEWTIMER(q->rspq.holdoff_tmr));
2979
2980 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2981 return 0;
2982
b1fb1f28 2983err_unlock:
b1186dee 2984 spin_unlock_irq(&adapter->sge.reg_lock);
b1fb1f28 2985err:
4d22de3e
DLR
2986 t3_free_qset(adapter, q);
2987 return ret;
2988}
2989
0ca41c04
DLR
2990/**
2991 * t3_stop_sge_timers - stop SGE timer call backs
2992 * @adap: the adapter
2993 *
2994 * Stops each SGE queue set's timer call back
2995 */
2996void t3_stop_sge_timers(struct adapter *adap)
2997{
2998 int i;
2999
3000 for (i = 0; i < SGE_QSETS; ++i) {
3001 struct sge_qset *q = &adap->sge.qs[i];
3002
3003 if (q->tx_reclaim_timer.function)
3004 del_timer_sync(&q->tx_reclaim_timer);
3005 }
3006}
3007
4d22de3e
DLR
3008/**
3009 * t3_free_sge_resources - free SGE resources
3010 * @adap: the adapter
3011 *
3012 * Frees resources used by the SGE queue sets.
3013 */
3014void t3_free_sge_resources(struct adapter *adap)
3015{
3016 int i;
3017
3018 for (i = 0; i < SGE_QSETS; ++i)
3019 t3_free_qset(adap, &adap->sge.qs[i]);
3020}
3021
3022/**
3023 * t3_sge_start - enable SGE
3024 * @adap: the adapter
3025 *
3026 * Enables the SGE for DMAs. This is the last step in starting packet
3027 * transfers.
3028 */
3029void t3_sge_start(struct adapter *adap)
3030{
3031 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3032}
3033
3034/**
3035 * t3_sge_stop - disable SGE operation
3036 * @adap: the adapter
3037 *
3038 * Disables the DMA engine. This can be called in emeregencies (e.g.,
3039 * from error interrupts) or from normal process context. In the latter
3040 * case it also disables any pending queue restart tasklets. Note that
3041 * if it is called in interrupt context it cannot disable the restart
3042 * tasklets as it cannot wait, however the tasklets will have no effect
3043 * since the doorbells are disabled and the driver will call this again
3044 * later from process context, at which time the tasklets will be stopped
3045 * if they are still running.
3046 */
3047void t3_sge_stop(struct adapter *adap)
3048{
3049 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3050 if (!in_interrupt()) {
3051 int i;
3052
3053 for (i = 0; i < SGE_QSETS; ++i) {
3054 struct sge_qset *qs = &adap->sge.qs[i];
3055
3056 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3057 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3058 }
3059 }
3060}
3061
3062/**
3063 * t3_sge_init - initialize SGE
3064 * @adap: the adapter
3065 * @p: the SGE parameters
3066 *
3067 * Performs SGE initialization needed every time after a chip reset.
3068 * We do not initialize any of the queue sets here, instead the driver
3069 * top-level must request those individually. We also do not enable DMA
3070 * here, that should be done after the queues have been set up.
3071 */
3072void t3_sge_init(struct adapter *adap, struct sge_params *p)
3073{
3074 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3075
3076 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
b881955b 3077 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
4d22de3e
DLR
3078 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3079 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3080#if SGE_NUM_GENBITS == 1
3081 ctrl |= F_EGRGENCTRL;
3082#endif
3083 if (adap->params.rev > 0) {
3084 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3085 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
4d22de3e
DLR
3086 }
3087 t3_write_reg(adap, A_SG_CONTROL, ctrl);
3088 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3089 V_LORCQDRBTHRSH(512));
3090 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3091 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
6195c71d 3092 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
b881955b
DLR
3093 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3094 adap->params.rev < T3_REV_C ? 1000 : 500);
4d22de3e
DLR
3095 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3096 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3097 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3098 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3099 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3100}
3101
3102/**
3103 * t3_sge_prep - one-time SGE initialization
3104 * @adap: the associated adapter
3105 * @p: SGE parameters
3106 *
3107 * Performs one-time initialization of SGE SW state. Includes determining
3108 * defaults for the assorted SGE parameters, which admins can change until
3109 * they are used to initialize the SGE.
3110 */
7b9b0943 3111void t3_sge_prep(struct adapter *adap, struct sge_params *p)
4d22de3e
DLR
3112{
3113 int i;
3114
3115 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3116 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3117
3118 for (i = 0; i < SGE_QSETS; ++i) {
3119 struct qset_params *q = p->qset + i;
3120
3121 q->polling = adap->params.rev > 0;
3122 q->coalesce_usecs = 5;
3123 q->rspq_size = 1024;
e0994eb1 3124 q->fl_size = 1024;
7385ecf3 3125 q->jumbo_size = 512;
4d22de3e
DLR
3126 q->txq_size[TXQ_ETH] = 1024;
3127 q->txq_size[TXQ_OFLD] = 1024;
3128 q->txq_size[TXQ_CTRL] = 256;
3129 q->cong_thres = 0;
3130 }
3131
3132 spin_lock_init(&adap->sge.reg_lock);
3133}
3134
3135/**
3136 * t3_get_desc - dump an SGE descriptor for debugging purposes
3137 * @qs: the queue set
3138 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3139 * @idx: the descriptor index in the queue
3140 * @data: where to dump the descriptor contents
3141 *
3142 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3143 * size of the descriptor.
3144 */
3145int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3146 unsigned char *data)
3147{
3148 if (qnum >= 6)
3149 return -EINVAL;
3150
3151 if (qnum < 3) {
3152 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3153 return -EINVAL;
3154 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3155 return sizeof(struct tx_desc);
3156 }
3157
3158 if (qnum == 3) {
3159 if (!qs->rspq.desc || idx >= qs->rspq.size)
3160 return -EINVAL;
3161 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3162 return sizeof(struct rsp_desc);
3163 }
3164
3165 qnum -= 4;
3166 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3167 return -EINVAL;
3168 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3169 return sizeof(struct rx_desc);
3170}