qeth: Drop ARP packages on HiperSockets interface with NOARP attribute.
[linux-2.6-block.git] / drivers / net / cxgb3 / sge.c
CommitLineData
4d22de3e 1/*
1d68e93d 2 * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
4d22de3e 3 *
1d68e93d
DLR
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
4d22de3e 9 *
1d68e93d
DLR
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
4d22de3e 31 */
4d22de3e
DLR
32#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/etherdevice.h>
35#include <linux/if_vlan.h>
36#include <linux/ip.h>
37#include <linux/tcp.h>
38#include <linux/dma-mapping.h>
39#include "common.h"
40#include "regs.h"
41#include "sge_defs.h"
42#include "t3_cpl.h"
43#include "firmware_exports.h"
44
45#define USE_GTS 0
46
47#define SGE_RX_SM_BUF_SIZE 1536
e0994eb1 48
4d22de3e 49#define SGE_RX_COPY_THRES 256
cf992af5 50#define SGE_RX_PULL_LEN 128
4d22de3e 51
e0994eb1 52/*
cf992af5
DLR
53 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
54 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
55 * directly.
e0994eb1 56 */
cf992af5
DLR
57#define FL0_PG_CHUNK_SIZE 2048
58
e0994eb1 59#define SGE_RX_DROP_THRES 16
4d22de3e
DLR
60
61/*
62 * Period of the Tx buffer reclaim timer. This timer does not need to run
63 * frequently as Tx buffers are usually reclaimed by new Tx packets.
64 */
65#define TX_RECLAIM_PERIOD (HZ / 4)
66
67/* WR size in bytes */
68#define WR_LEN (WR_FLITS * 8)
69
70/*
71 * Types of Tx queues in each queue set. Order here matters, do not change.
72 */
73enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
74
75/* Values for sge_txq.flags */
76enum {
77 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
78 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
79};
80
81struct tx_desc {
82 u64 flit[TX_DESC_FLITS];
83};
84
85struct rx_desc {
86 __be32 addr_lo;
87 __be32 len_gen;
88 __be32 gen2;
89 __be32 addr_hi;
90};
91
92struct tx_sw_desc { /* SW state per Tx descriptor */
93 struct sk_buff *skb;
94};
95
cf992af5 96struct rx_sw_desc { /* SW state per Rx descriptor */
e0994eb1
DLR
97 union {
98 struct sk_buff *skb;
cf992af5
DLR
99 struct fl_pg_chunk pg_chunk;
100 };
101 DECLARE_PCI_UNMAP_ADDR(dma_addr);
4d22de3e
DLR
102};
103
104struct rsp_desc { /* response queue descriptor */
105 struct rss_header rss_hdr;
106 __be32 flags;
107 __be32 len_cq;
108 u8 imm_data[47];
109 u8 intr_gen;
110};
111
112struct unmap_info { /* packet unmapping info, overlays skb->cb */
113 int sflit; /* start flit of first SGL entry in Tx descriptor */
114 u16 fragidx; /* first page fragment in current Tx descriptor */
115 u16 addr_idx; /* buffer index of first SGL entry in descriptor */
116 u32 len; /* mapped length of skb main body */
117};
118
99d7cf30
DLR
119/*
120 * Holds unmapping information for Tx packets that need deferred unmapping.
121 * This structure lives at skb->head and must be allocated by callers.
122 */
123struct deferred_unmap_info {
124 struct pci_dev *pdev;
125 dma_addr_t addr[MAX_SKB_FRAGS + 1];
126};
127
4d22de3e
DLR
128/*
129 * Maps a number of flits to the number of Tx descriptors that can hold them.
130 * The formula is
131 *
132 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
133 *
134 * HW allows up to 4 descriptors to be combined into a WR.
135 */
136static u8 flit_desc_map[] = {
137 0,
138#if SGE_NUM_GENBITS == 1
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
143#elif SGE_NUM_GENBITS == 2
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
148#else
149# error "SGE_NUM_GENBITS must be 1 or 2"
150#endif
151};
152
153static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
154{
155 return container_of(q, struct sge_qset, fl[qidx]);
156}
157
158static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
159{
160 return container_of(q, struct sge_qset, rspq);
161}
162
163static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
164{
165 return container_of(q, struct sge_qset, txq[qidx]);
166}
167
168/**
169 * refill_rspq - replenish an SGE response queue
170 * @adapter: the adapter
171 * @q: the response queue to replenish
172 * @credits: how many new responses to make available
173 *
174 * Replenishes a response queue by making the supplied number of responses
175 * available to HW.
176 */
177static inline void refill_rspq(struct adapter *adapter,
178 const struct sge_rspq *q, unsigned int credits)
179{
180 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
181 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
182}
183
184/**
185 * need_skb_unmap - does the platform need unmapping of sk_buffs?
186 *
187 * Returns true if the platfrom needs sk_buff unmapping. The compiler
188 * optimizes away unecessary code if this returns true.
189 */
190static inline int need_skb_unmap(void)
191{
192 /*
193 * This structure is used to tell if the platfrom needs buffer
194 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
195 */
196 struct dummy {
197 DECLARE_PCI_UNMAP_ADDR(addr);
198 };
199
200 return sizeof(struct dummy) != 0;
201}
202
203/**
204 * unmap_skb - unmap a packet main body and its page fragments
205 * @skb: the packet
206 * @q: the Tx queue containing Tx descriptors for the packet
207 * @cidx: index of Tx descriptor
208 * @pdev: the PCI device
209 *
210 * Unmap the main body of an sk_buff and its page fragments, if any.
211 * Because of the fairly complicated structure of our SGLs and the desire
212 * to conserve space for metadata, we keep the information necessary to
213 * unmap an sk_buff partly in the sk_buff itself (in its cb), and partly
214 * in the Tx descriptors (the physical addresses of the various data
215 * buffers). The send functions initialize the state in skb->cb so we
216 * can unmap the buffers held in the first Tx descriptor here, and we
217 * have enough information at this point to update the state for the next
218 * Tx descriptor.
219 */
220static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
221 unsigned int cidx, struct pci_dev *pdev)
222{
223 const struct sg_ent *sgp;
224 struct unmap_info *ui = (struct unmap_info *)skb->cb;
225 int nfrags, frag_idx, curflit, j = ui->addr_idx;
226
227 sgp = (struct sg_ent *)&q->desc[cidx].flit[ui->sflit];
228
229 if (ui->len) {
230 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]), ui->len,
231 PCI_DMA_TODEVICE);
232 ui->len = 0; /* so we know for next descriptor for this skb */
233 j = 1;
234 }
235
236 frag_idx = ui->fragidx;
237 curflit = ui->sflit + 1 + j;
238 nfrags = skb_shinfo(skb)->nr_frags;
239
240 while (frag_idx < nfrags && curflit < WR_FLITS) {
241 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
242 skb_shinfo(skb)->frags[frag_idx].size,
243 PCI_DMA_TODEVICE);
244 j ^= 1;
245 if (j == 0) {
246 sgp++;
247 curflit++;
248 }
249 curflit++;
250 frag_idx++;
251 }
252
253 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
254 ui->fragidx = frag_idx;
255 ui->addr_idx = j;
256 ui->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
257 }
258}
259
260/**
261 * free_tx_desc - reclaims Tx descriptors and their buffers
262 * @adapter: the adapter
263 * @q: the Tx queue to reclaim descriptors from
264 * @n: the number of descriptors to reclaim
265 *
266 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
267 * Tx buffers. Called with the Tx queue lock held.
268 */
269static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
270 unsigned int n)
271{
272 struct tx_sw_desc *d;
273 struct pci_dev *pdev = adapter->pdev;
274 unsigned int cidx = q->cidx;
275
99d7cf30
DLR
276 const int need_unmap = need_skb_unmap() &&
277 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
278
4d22de3e
DLR
279 d = &q->sdesc[cidx];
280 while (n--) {
281 if (d->skb) { /* an SGL is present */
99d7cf30 282 if (need_unmap)
4d22de3e
DLR
283 unmap_skb(d->skb, q, cidx, pdev);
284 if (d->skb->priority == cidx)
285 kfree_skb(d->skb);
286 }
287 ++d;
288 if (++cidx == q->size) {
289 cidx = 0;
290 d = q->sdesc;
291 }
292 }
293 q->cidx = cidx;
294}
295
296/**
297 * reclaim_completed_tx - reclaims completed Tx descriptors
298 * @adapter: the adapter
299 * @q: the Tx queue to reclaim completed descriptors from
300 *
301 * Reclaims Tx descriptors that the SGE has indicated it has processed,
302 * and frees the associated buffers if possible. Called with the Tx
303 * queue's lock held.
304 */
305static inline void reclaim_completed_tx(struct adapter *adapter,
306 struct sge_txq *q)
307{
308 unsigned int reclaim = q->processed - q->cleaned;
309
310 if (reclaim) {
311 free_tx_desc(adapter, q, reclaim);
312 q->cleaned += reclaim;
313 q->in_use -= reclaim;
314 }
315}
316
317/**
318 * should_restart_tx - are there enough resources to restart a Tx queue?
319 * @q: the Tx queue
320 *
321 * Checks if there are enough descriptors to restart a suspended Tx queue.
322 */
323static inline int should_restart_tx(const struct sge_txq *q)
324{
325 unsigned int r = q->processed - q->cleaned;
326
327 return q->in_use - r < (q->size >> 1);
328}
329
330/**
331 * free_rx_bufs - free the Rx buffers on an SGE free list
332 * @pdev: the PCI device associated with the adapter
333 * @rxq: the SGE free list to clean up
334 *
335 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
336 * this queue should be stopped before calling this function.
337 */
338static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
339{
340 unsigned int cidx = q->cidx;
341
342 while (q->credits--) {
343 struct rx_sw_desc *d = &q->sdesc[cidx];
344
345 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
346 q->buf_size, PCI_DMA_FROMDEVICE);
cf992af5
DLR
347 if (q->use_pages) {
348 put_page(d->pg_chunk.page);
349 d->pg_chunk.page = NULL;
e0994eb1 350 } else {
cf992af5
DLR
351 kfree_skb(d->skb);
352 d->skb = NULL;
e0994eb1 353 }
4d22de3e
DLR
354 if (++cidx == q->size)
355 cidx = 0;
356 }
e0994eb1 357
cf992af5
DLR
358 if (q->pg_chunk.page) {
359 __free_page(q->pg_chunk.page);
360 q->pg_chunk.page = NULL;
361 }
4d22de3e
DLR
362}
363
364/**
365 * add_one_rx_buf - add a packet buffer to a free-buffer list
cf992af5 366 * @va: buffer start VA
4d22de3e
DLR
367 * @len: the buffer length
368 * @d: the HW Rx descriptor to write
369 * @sd: the SW Rx descriptor to write
370 * @gen: the generation bit value
371 * @pdev: the PCI device associated with the adapter
372 *
373 * Add a buffer of the given length to the supplied HW and SW Rx
374 * descriptors.
375 */
cf992af5 376static inline void add_one_rx_buf(void *va, unsigned int len,
4d22de3e
DLR
377 struct rx_desc *d, struct rx_sw_desc *sd,
378 unsigned int gen, struct pci_dev *pdev)
379{
380 dma_addr_t mapping;
381
e0994eb1 382 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
4d22de3e
DLR
383 pci_unmap_addr_set(sd, dma_addr, mapping);
384
385 d->addr_lo = cpu_to_be32(mapping);
386 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
387 wmb();
388 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
389 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
390}
391
cf992af5
DLR
392static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
393{
394 if (!q->pg_chunk.page) {
395 q->pg_chunk.page = alloc_page(gfp);
396 if (unlikely(!q->pg_chunk.page))
397 return -ENOMEM;
398 q->pg_chunk.va = page_address(q->pg_chunk.page);
399 q->pg_chunk.offset = 0;
400 }
401 sd->pg_chunk = q->pg_chunk;
402
403 q->pg_chunk.offset += q->buf_size;
404 if (q->pg_chunk.offset == PAGE_SIZE)
405 q->pg_chunk.page = NULL;
406 else {
407 q->pg_chunk.va += q->buf_size;
408 get_page(q->pg_chunk.page);
409 }
410 return 0;
411}
412
4d22de3e
DLR
413/**
414 * refill_fl - refill an SGE free-buffer list
415 * @adapter: the adapter
416 * @q: the free-list to refill
417 * @n: the number of new buffers to allocate
418 * @gfp: the gfp flags for allocating new buffers
419 *
420 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
421 * allocated with the supplied gfp flags. The caller must assure that
422 * @n does not exceed the queue's capacity.
423 */
424static void refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
425{
cf992af5 426 void *buf_start;
4d22de3e
DLR
427 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
428 struct rx_desc *d = &q->desc[q->pidx];
429
430 while (n--) {
cf992af5
DLR
431 if (q->use_pages) {
432 if (unlikely(alloc_pg_chunk(q, sd, gfp))) {
433nomem: q->alloc_failed++;
e0994eb1
DLR
434 break;
435 }
cf992af5 436 buf_start = sd->pg_chunk.va;
e0994eb1 437 } else {
cf992af5 438 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
e0994eb1 439
cf992af5
DLR
440 if (!skb)
441 goto nomem;
e0994eb1 442
cf992af5
DLR
443 sd->skb = skb;
444 buf_start = skb->data;
e0994eb1
DLR
445 }
446
cf992af5
DLR
447 add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
448 adap->pdev);
4d22de3e
DLR
449 d++;
450 sd++;
451 if (++q->pidx == q->size) {
452 q->pidx = 0;
453 q->gen ^= 1;
454 sd = q->sdesc;
455 d = q->desc;
456 }
457 q->credits++;
458 }
459
460 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
461}
462
463static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
464{
465 refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC);
466}
467
468/**
469 * recycle_rx_buf - recycle a receive buffer
470 * @adapter: the adapter
471 * @q: the SGE free list
472 * @idx: index of buffer to recycle
473 *
474 * Recycles the specified buffer on the given free list by adding it at
475 * the next available slot on the list.
476 */
477static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
478 unsigned int idx)
479{
480 struct rx_desc *from = &q->desc[idx];
481 struct rx_desc *to = &q->desc[q->pidx];
482
cf992af5 483 q->sdesc[q->pidx] = q->sdesc[idx];
4d22de3e
DLR
484 to->addr_lo = from->addr_lo; /* already big endian */
485 to->addr_hi = from->addr_hi; /* likewise */
486 wmb();
487 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
488 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
489 q->credits++;
490
491 if (++q->pidx == q->size) {
492 q->pidx = 0;
493 q->gen ^= 1;
494 }
495 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
496}
497
498/**
499 * alloc_ring - allocate resources for an SGE descriptor ring
500 * @pdev: the PCI device
501 * @nelem: the number of descriptors
502 * @elem_size: the size of each descriptor
503 * @sw_size: the size of the SW state associated with each ring element
504 * @phys: the physical address of the allocated ring
505 * @metadata: address of the array holding the SW state for the ring
506 *
507 * Allocates resources for an SGE descriptor ring, such as Tx queues,
508 * free buffer lists, or response queues. Each SGE ring requires
509 * space for its HW descriptors plus, optionally, space for the SW state
510 * associated with each HW entry (the metadata). The function returns
511 * three values: the virtual address for the HW ring (the return value
512 * of the function), the physical address of the HW ring, and the address
513 * of the SW ring.
514 */
515static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
e0994eb1 516 size_t sw_size, dma_addr_t * phys, void *metadata)
4d22de3e
DLR
517{
518 size_t len = nelem * elem_size;
519 void *s = NULL;
520 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
521
522 if (!p)
523 return NULL;
524 if (sw_size) {
525 s = kcalloc(nelem, sw_size, GFP_KERNEL);
526
527 if (!s) {
528 dma_free_coherent(&pdev->dev, len, p, *phys);
529 return NULL;
530 }
531 }
532 if (metadata)
533 *(void **)metadata = s;
534 memset(p, 0, len);
535 return p;
536}
537
538/**
539 * free_qset - free the resources of an SGE queue set
540 * @adapter: the adapter owning the queue set
541 * @q: the queue set
542 *
543 * Release the HW and SW resources associated with an SGE queue set, such
544 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
545 * queue set must be quiesced prior to calling this.
546 */
547void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
548{
549 int i;
550 struct pci_dev *pdev = adapter->pdev;
551
552 if (q->tx_reclaim_timer.function)
553 del_timer_sync(&q->tx_reclaim_timer);
554
555 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
556 if (q->fl[i].desc) {
557 spin_lock(&adapter->sge.reg_lock);
558 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
559 spin_unlock(&adapter->sge.reg_lock);
560 free_rx_bufs(pdev, &q->fl[i]);
561 kfree(q->fl[i].sdesc);
562 dma_free_coherent(&pdev->dev,
563 q->fl[i].size *
564 sizeof(struct rx_desc), q->fl[i].desc,
565 q->fl[i].phys_addr);
566 }
567
568 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
569 if (q->txq[i].desc) {
570 spin_lock(&adapter->sge.reg_lock);
571 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
572 spin_unlock(&adapter->sge.reg_lock);
573 if (q->txq[i].sdesc) {
574 free_tx_desc(adapter, &q->txq[i],
575 q->txq[i].in_use);
576 kfree(q->txq[i].sdesc);
577 }
578 dma_free_coherent(&pdev->dev,
579 q->txq[i].size *
580 sizeof(struct tx_desc),
581 q->txq[i].desc, q->txq[i].phys_addr);
582 __skb_queue_purge(&q->txq[i].sendq);
583 }
584
585 if (q->rspq.desc) {
586 spin_lock(&adapter->sge.reg_lock);
587 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
588 spin_unlock(&adapter->sge.reg_lock);
589 dma_free_coherent(&pdev->dev,
590 q->rspq.size * sizeof(struct rsp_desc),
591 q->rspq.desc, q->rspq.phys_addr);
592 }
593
594 if (q->netdev)
595 q->netdev->atalk_ptr = NULL;
596
597 memset(q, 0, sizeof(*q));
598}
599
600/**
601 * init_qset_cntxt - initialize an SGE queue set context info
602 * @qs: the queue set
603 * @id: the queue set id
604 *
605 * Initializes the TIDs and context ids for the queues of a queue set.
606 */
607static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
608{
609 qs->rspq.cntxt_id = id;
610 qs->fl[0].cntxt_id = 2 * id;
611 qs->fl[1].cntxt_id = 2 * id + 1;
612 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
613 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
614 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
615 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
616 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
617}
618
619/**
620 * sgl_len - calculates the size of an SGL of the given capacity
621 * @n: the number of SGL entries
622 *
623 * Calculates the number of flits needed for a scatter/gather list that
624 * can hold the given number of entries.
625 */
626static inline unsigned int sgl_len(unsigned int n)
627{
628 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
629 return (3 * n) / 2 + (n & 1);
630}
631
632/**
633 * flits_to_desc - returns the num of Tx descriptors for the given flits
634 * @n: the number of flits
635 *
636 * Calculates the number of Tx descriptors needed for the supplied number
637 * of flits.
638 */
639static inline unsigned int flits_to_desc(unsigned int n)
640{
641 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
642 return flit_desc_map[n];
643}
644
cf992af5
DLR
645/**
646 * get_packet - return the next ingress packet buffer from a free list
647 * @adap: the adapter that received the packet
648 * @fl: the SGE free list holding the packet
649 * @len: the packet length including any SGE padding
650 * @drop_thres: # of remaining buffers before we start dropping packets
651 *
652 * Get the next packet from a free list and complete setup of the
653 * sk_buff. If the packet is small we make a copy and recycle the
654 * original buffer, otherwise we use the original buffer itself. If a
655 * positive drop threshold is supplied packets are dropped and their
656 * buffers recycled if (a) the number of remaining buffers is under the
657 * threshold and the packet is too big to copy, or (b) the packet should
658 * be copied but there is no memory for the copy.
659 */
660static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
661 unsigned int len, unsigned int drop_thres)
662{
663 struct sk_buff *skb = NULL;
664 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
665
666 prefetch(sd->skb->data);
667 fl->credits--;
668
669 if (len <= SGE_RX_COPY_THRES) {
670 skb = alloc_skb(len, GFP_ATOMIC);
671 if (likely(skb != NULL)) {
672 __skb_put(skb, len);
673 pci_dma_sync_single_for_cpu(adap->pdev,
674 pci_unmap_addr(sd, dma_addr), len,
675 PCI_DMA_FROMDEVICE);
676 memcpy(skb->data, sd->skb->data, len);
677 pci_dma_sync_single_for_device(adap->pdev,
678 pci_unmap_addr(sd, dma_addr), len,
679 PCI_DMA_FROMDEVICE);
680 } else if (!drop_thres)
681 goto use_orig_buf;
682recycle:
683 recycle_rx_buf(adap, fl, fl->cidx);
684 return skb;
685 }
686
687 if (unlikely(fl->credits < drop_thres))
688 goto recycle;
689
690use_orig_buf:
691 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
692 fl->buf_size, PCI_DMA_FROMDEVICE);
693 skb = sd->skb;
694 skb_put(skb, len);
695 __refill_fl(adap, fl);
696 return skb;
697}
698
699/**
700 * get_packet_pg - return the next ingress packet buffer from a free list
701 * @adap: the adapter that received the packet
702 * @fl: the SGE free list holding the packet
703 * @len: the packet length including any SGE padding
704 * @drop_thres: # of remaining buffers before we start dropping packets
705 *
706 * Get the next packet from a free list populated with page chunks.
707 * If the packet is small we make a copy and recycle the original buffer,
708 * otherwise we attach the original buffer as a page fragment to a fresh
709 * sk_buff. If a positive drop threshold is supplied packets are dropped
710 * and their buffers recycled if (a) the number of remaining buffers is
711 * under the threshold and the packet is too big to copy, or (b) there's
712 * no system memory.
713 *
714 * Note: this function is similar to @get_packet but deals with Rx buffers
715 * that are page chunks rather than sk_buffs.
716 */
717static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
718 unsigned int len, unsigned int drop_thres)
719{
720 struct sk_buff *skb = NULL;
721 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
722
723 if (len <= SGE_RX_COPY_THRES) {
724 skb = alloc_skb(len, GFP_ATOMIC);
725 if (likely(skb != NULL)) {
726 __skb_put(skb, len);
727 pci_dma_sync_single_for_cpu(adap->pdev,
728 pci_unmap_addr(sd, dma_addr), len,
729 PCI_DMA_FROMDEVICE);
730 memcpy(skb->data, sd->pg_chunk.va, len);
731 pci_dma_sync_single_for_device(adap->pdev,
732 pci_unmap_addr(sd, dma_addr), len,
733 PCI_DMA_FROMDEVICE);
734 } else if (!drop_thres)
735 return NULL;
736recycle:
737 fl->credits--;
738 recycle_rx_buf(adap, fl, fl->cidx);
739 return skb;
740 }
741
742 if (unlikely(fl->credits <= drop_thres))
743 goto recycle;
744
745 skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
746 if (unlikely(!skb)) {
747 if (!drop_thres)
748 return NULL;
749 goto recycle;
750 }
751
752 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
753 fl->buf_size, PCI_DMA_FROMDEVICE);
754 __skb_put(skb, SGE_RX_PULL_LEN);
755 memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
756 skb_fill_page_desc(skb, 0, sd->pg_chunk.page,
757 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
758 len - SGE_RX_PULL_LEN);
759 skb->len = len;
760 skb->data_len = len - SGE_RX_PULL_LEN;
761 skb->truesize += skb->data_len;
762
763 fl->credits--;
764 /*
765 * We do not refill FLs here, we let the caller do it to overlap a
766 * prefetch.
767 */
768 return skb;
769}
770
4d22de3e
DLR
771/**
772 * get_imm_packet - return the next ingress packet buffer from a response
773 * @resp: the response descriptor containing the packet data
774 *
775 * Return a packet containing the immediate data of the given response.
776 */
777static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
778{
779 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
780
781 if (skb) {
782 __skb_put(skb, IMMED_PKT_SIZE);
27d7ff46 783 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
4d22de3e
DLR
784 }
785 return skb;
786}
787
788/**
789 * calc_tx_descs - calculate the number of Tx descriptors for a packet
790 * @skb: the packet
791 *
792 * Returns the number of Tx descriptors needed for the given Ethernet
793 * packet. Ethernet packets require addition of WR and CPL headers.
794 */
795static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
796{
797 unsigned int flits;
798
799 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
800 return 1;
801
802 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
803 if (skb_shinfo(skb)->gso_size)
804 flits++;
805 return flits_to_desc(flits);
806}
807
808/**
809 * make_sgl - populate a scatter/gather list for a packet
810 * @skb: the packet
811 * @sgp: the SGL to populate
812 * @start: start address of skb main body data to include in the SGL
813 * @len: length of skb main body data to include in the SGL
814 * @pdev: the PCI device
815 *
816 * Generates a scatter/gather list for the buffers that make up a packet
817 * and returns the SGL size in 8-byte words. The caller must size the SGL
818 * appropriately.
819 */
820static inline unsigned int make_sgl(const struct sk_buff *skb,
821 struct sg_ent *sgp, unsigned char *start,
822 unsigned int len, struct pci_dev *pdev)
823{
824 dma_addr_t mapping;
825 unsigned int i, j = 0, nfrags;
826
827 if (len) {
828 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
829 sgp->len[0] = cpu_to_be32(len);
830 sgp->addr[0] = cpu_to_be64(mapping);
831 j = 1;
832 }
833
834 nfrags = skb_shinfo(skb)->nr_frags;
835 for (i = 0; i < nfrags; i++) {
836 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
837
838 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
839 frag->size, PCI_DMA_TODEVICE);
840 sgp->len[j] = cpu_to_be32(frag->size);
841 sgp->addr[j] = cpu_to_be64(mapping);
842 j ^= 1;
843 if (j == 0)
844 ++sgp;
845 }
846 if (j)
847 sgp->len[j] = 0;
848 return ((nfrags + (len != 0)) * 3) / 2 + j;
849}
850
851/**
852 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
853 * @adap: the adapter
854 * @q: the Tx queue
855 *
856 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
857 * where the HW is going to sleep just after we checked, however,
858 * then the interrupt handler will detect the outstanding TX packet
859 * and ring the doorbell for us.
860 *
861 * When GTS is disabled we unconditionally ring the doorbell.
862 */
863static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
864{
865#if USE_GTS
866 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
867 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
868 set_bit(TXQ_LAST_PKT_DB, &q->flags);
869 t3_write_reg(adap, A_SG_KDOORBELL,
870 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
871 }
872#else
873 wmb(); /* write descriptors before telling HW */
874 t3_write_reg(adap, A_SG_KDOORBELL,
875 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
876#endif
877}
878
879static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
880{
881#if SGE_NUM_GENBITS == 2
882 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
883#endif
884}
885
886/**
887 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
888 * @ndesc: number of Tx descriptors spanned by the SGL
889 * @skb: the packet corresponding to the WR
890 * @d: first Tx descriptor to be written
891 * @pidx: index of above descriptors
892 * @q: the SGE Tx queue
893 * @sgl: the SGL
894 * @flits: number of flits to the start of the SGL in the first descriptor
895 * @sgl_flits: the SGL size in flits
896 * @gen: the Tx descriptor generation
897 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
898 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
899 *
900 * Write a work request header and an associated SGL. If the SGL is
901 * small enough to fit into one Tx descriptor it has already been written
902 * and we just need to write the WR header. Otherwise we distribute the
903 * SGL across the number of descriptors it spans.
904 */
905static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
906 struct tx_desc *d, unsigned int pidx,
907 const struct sge_txq *q,
908 const struct sg_ent *sgl,
909 unsigned int flits, unsigned int sgl_flits,
910 unsigned int gen, unsigned int wr_hi,
911 unsigned int wr_lo)
912{
913 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
914 struct tx_sw_desc *sd = &q->sdesc[pidx];
915
916 sd->skb = skb;
917 if (need_skb_unmap()) {
918 struct unmap_info *ui = (struct unmap_info *)skb->cb;
919
920 ui->fragidx = 0;
921 ui->addr_idx = 0;
922 ui->sflit = flits;
923 }
924
925 if (likely(ndesc == 1)) {
926 skb->priority = pidx;
927 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
928 V_WR_SGLSFLT(flits)) | wr_hi;
929 wmb();
930 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
931 V_WR_GEN(gen)) | wr_lo;
932 wr_gen2(d, gen);
933 } else {
934 unsigned int ogen = gen;
935 const u64 *fp = (const u64 *)sgl;
936 struct work_request_hdr *wp = wrp;
937
938 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
939 V_WR_SGLSFLT(flits)) | wr_hi;
940
941 while (sgl_flits) {
942 unsigned int avail = WR_FLITS - flits;
943
944 if (avail > sgl_flits)
945 avail = sgl_flits;
946 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
947 sgl_flits -= avail;
948 ndesc--;
949 if (!sgl_flits)
950 break;
951
952 fp += avail;
953 d++;
954 sd++;
955 if (++pidx == q->size) {
956 pidx = 0;
957 gen ^= 1;
958 d = q->desc;
959 sd = q->sdesc;
960 }
961
962 sd->skb = skb;
963 wrp = (struct work_request_hdr *)d;
964 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
965 V_WR_SGLSFLT(1)) | wr_hi;
966 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
967 sgl_flits + 1)) |
968 V_WR_GEN(gen)) | wr_lo;
969 wr_gen2(d, gen);
970 flits = 1;
971 }
972 skb->priority = pidx;
973 wrp->wr_hi |= htonl(F_WR_EOP);
974 wmb();
975 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
976 wr_gen2((struct tx_desc *)wp, ogen);
977 WARN_ON(ndesc != 0);
978 }
979}
980
981/**
982 * write_tx_pkt_wr - write a TX_PKT work request
983 * @adap: the adapter
984 * @skb: the packet to send
985 * @pi: the egress interface
986 * @pidx: index of the first Tx descriptor to write
987 * @gen: the generation value to use
988 * @q: the Tx queue
989 * @ndesc: number of descriptors the packet will occupy
990 * @compl: the value of the COMPL bit to use
991 *
992 * Generate a TX_PKT work request to send the supplied packet.
993 */
994static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
995 const struct port_info *pi,
996 unsigned int pidx, unsigned int gen,
997 struct sge_txq *q, unsigned int ndesc,
998 unsigned int compl)
999{
1000 unsigned int flits, sgl_flits, cntrl, tso_info;
1001 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1002 struct tx_desc *d = &q->desc[pidx];
1003 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1004
1005 cpl->len = htonl(skb->len | 0x80000000);
1006 cntrl = V_TXPKT_INTF(pi->port_id);
1007
1008 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1009 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1010
1011 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1012 if (tso_info) {
1013 int eth_type;
1014 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1015
1016 d->flit[2] = 0;
1017 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1018 hdr->cntrl = htonl(cntrl);
bbe735e4 1019 eth_type = skb_network_offset(skb) == ETH_HLEN ?
4d22de3e
DLR
1020 CPL_ETH_II : CPL_ETH_II_VLAN;
1021 tso_info |= V_LSO_ETH_TYPE(eth_type) |
eddc9ec5 1022 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
aa8223c7 1023 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
4d22de3e
DLR
1024 hdr->lso_info = htonl(tso_info);
1025 flits = 3;
1026 } else {
1027 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1028 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1029 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1030 cpl->cntrl = htonl(cntrl);
1031
1032 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1033 q->sdesc[pidx].skb = NULL;
1034 if (!skb->data_len)
d626f62b
ACM
1035 skb_copy_from_linear_data(skb, &d->flit[2],
1036 skb->len);
4d22de3e
DLR
1037 else
1038 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1039
1040 flits = (skb->len + 7) / 8 + 2;
1041 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1042 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1043 | F_WR_SOP | F_WR_EOP | compl);
1044 wmb();
1045 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1046 V_WR_TID(q->token));
1047 wr_gen2(d, gen);
1048 kfree_skb(skb);
1049 return;
1050 }
1051
1052 flits = 2;
1053 }
1054
1055 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1056 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1057 if (need_skb_unmap())
1058 ((struct unmap_info *)skb->cb)->len = skb_headlen(skb);
1059
1060 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1061 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1062 htonl(V_WR_TID(q->token)));
1063}
1064
1065/**
1066 * eth_xmit - add a packet to the Ethernet Tx queue
1067 * @skb: the packet
1068 * @dev: the egress net device
1069 *
1070 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1071 */
1072int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1073{
1074 unsigned int ndesc, pidx, credits, gen, compl;
1075 const struct port_info *pi = netdev_priv(dev);
1076 struct adapter *adap = dev->priv;
1077 struct sge_qset *qs = dev2qset(dev);
1078 struct sge_txq *q = &qs->txq[TXQ_ETH];
1079
1080 /*
1081 * The chip min packet length is 9 octets but play safe and reject
1082 * anything shorter than an Ethernet header.
1083 */
1084 if (unlikely(skb->len < ETH_HLEN)) {
1085 dev_kfree_skb(skb);
1086 return NETDEV_TX_OK;
1087 }
1088
1089 spin_lock(&q->lock);
1090 reclaim_completed_tx(adap, q);
1091
1092 credits = q->size - q->in_use;
1093 ndesc = calc_tx_descs(skb);
1094
1095 if (unlikely(credits < ndesc)) {
1096 if (!netif_queue_stopped(dev)) {
1097 netif_stop_queue(dev);
1098 set_bit(TXQ_ETH, &qs->txq_stopped);
1099 q->stops++;
1100 dev_err(&adap->pdev->dev,
1101 "%s: Tx ring %u full while queue awake!\n",
1102 dev->name, q->cntxt_id & 7);
1103 }
1104 spin_unlock(&q->lock);
1105 return NETDEV_TX_BUSY;
1106 }
1107
1108 q->in_use += ndesc;
1109 if (unlikely(credits - ndesc < q->stop_thres)) {
1110 q->stops++;
1111 netif_stop_queue(dev);
1112 set_bit(TXQ_ETH, &qs->txq_stopped);
1113#if !USE_GTS
1114 if (should_restart_tx(q) &&
1115 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1116 q->restarts++;
1117 netif_wake_queue(dev);
1118 }
1119#endif
1120 }
1121
1122 gen = q->gen;
1123 q->unacked += ndesc;
1124 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1125 q->unacked &= 7;
1126 pidx = q->pidx;
1127 q->pidx += ndesc;
1128 if (q->pidx >= q->size) {
1129 q->pidx -= q->size;
1130 q->gen ^= 1;
1131 }
1132
1133 /* update port statistics */
1134 if (skb->ip_summed == CHECKSUM_COMPLETE)
1135 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1136 if (skb_shinfo(skb)->gso_size)
1137 qs->port_stats[SGE_PSTAT_TSO]++;
1138 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1139 qs->port_stats[SGE_PSTAT_VLANINS]++;
1140
1141 dev->trans_start = jiffies;
1142 spin_unlock(&q->lock);
1143
1144 /*
1145 * We do not use Tx completion interrupts to free DMAd Tx packets.
1146 * This is good for performamce but means that we rely on new Tx
1147 * packets arriving to run the destructors of completed packets,
1148 * which open up space in their sockets' send queues. Sometimes
1149 * we do not get such new packets causing Tx to stall. A single
1150 * UDP transmitter is a good example of this situation. We have
1151 * a clean up timer that periodically reclaims completed packets
1152 * but it doesn't run often enough (nor do we want it to) to prevent
1153 * lengthy stalls. A solution to this problem is to run the
1154 * destructor early, after the packet is queued but before it's DMAd.
1155 * A cons is that we lie to socket memory accounting, but the amount
1156 * of extra memory is reasonable (limited by the number of Tx
1157 * descriptors), the packets do actually get freed quickly by new
1158 * packets almost always, and for protocols like TCP that wait for
1159 * acks to really free up the data the extra memory is even less.
1160 * On the positive side we run the destructors on the sending CPU
1161 * rather than on a potentially different completing CPU, usually a
1162 * good thing. We also run them without holding our Tx queue lock,
1163 * unlike what reclaim_completed_tx() would otherwise do.
1164 *
1165 * Run the destructor before telling the DMA engine about the packet
1166 * to make sure it doesn't complete and get freed prematurely.
1167 */
1168 if (likely(!skb_shared(skb)))
1169 skb_orphan(skb);
1170
1171 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1172 check_ring_tx_db(adap, q);
1173 return NETDEV_TX_OK;
1174}
1175
1176/**
1177 * write_imm - write a packet into a Tx descriptor as immediate data
1178 * @d: the Tx descriptor to write
1179 * @skb: the packet
1180 * @len: the length of packet data to write as immediate data
1181 * @gen: the generation bit value to write
1182 *
1183 * Writes a packet as immediate data into a Tx descriptor. The packet
1184 * contains a work request at its beginning. We must write the packet
1185 * carefully so the SGE doesn't read accidentally before it's written in
1186 * its entirety.
1187 */
1188static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1189 unsigned int len, unsigned int gen)
1190{
1191 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1192 struct work_request_hdr *to = (struct work_request_hdr *)d;
1193
1194 memcpy(&to[1], &from[1], len - sizeof(*from));
1195 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1196 V_WR_BCNTLFLT(len & 7));
1197 wmb();
1198 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1199 V_WR_LEN((len + 7) / 8));
1200 wr_gen2(d, gen);
1201 kfree_skb(skb);
1202}
1203
1204/**
1205 * check_desc_avail - check descriptor availability on a send queue
1206 * @adap: the adapter
1207 * @q: the send queue
1208 * @skb: the packet needing the descriptors
1209 * @ndesc: the number of Tx descriptors needed
1210 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1211 *
1212 * Checks if the requested number of Tx descriptors is available on an
1213 * SGE send queue. If the queue is already suspended or not enough
1214 * descriptors are available the packet is queued for later transmission.
1215 * Must be called with the Tx queue locked.
1216 *
1217 * Returns 0 if enough descriptors are available, 1 if there aren't
1218 * enough descriptors and the packet has been queued, and 2 if the caller
1219 * needs to retry because there weren't enough descriptors at the
1220 * beginning of the call but some freed up in the mean time.
1221 */
1222static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1223 struct sk_buff *skb, unsigned int ndesc,
1224 unsigned int qid)
1225{
1226 if (unlikely(!skb_queue_empty(&q->sendq))) {
1227 addq_exit:__skb_queue_tail(&q->sendq, skb);
1228 return 1;
1229 }
1230 if (unlikely(q->size - q->in_use < ndesc)) {
1231 struct sge_qset *qs = txq_to_qset(q, qid);
1232
1233 set_bit(qid, &qs->txq_stopped);
1234 smp_mb__after_clear_bit();
1235
1236 if (should_restart_tx(q) &&
1237 test_and_clear_bit(qid, &qs->txq_stopped))
1238 return 2;
1239
1240 q->stops++;
1241 goto addq_exit;
1242 }
1243 return 0;
1244}
1245
1246/**
1247 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1248 * @q: the SGE control Tx queue
1249 *
1250 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1251 * that send only immediate data (presently just the control queues) and
1252 * thus do not have any sk_buffs to release.
1253 */
1254static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1255{
1256 unsigned int reclaim = q->processed - q->cleaned;
1257
1258 q->in_use -= reclaim;
1259 q->cleaned += reclaim;
1260}
1261
1262static inline int immediate(const struct sk_buff *skb)
1263{
1264 return skb->len <= WR_LEN && !skb->data_len;
1265}
1266
1267/**
1268 * ctrl_xmit - send a packet through an SGE control Tx queue
1269 * @adap: the adapter
1270 * @q: the control queue
1271 * @skb: the packet
1272 *
1273 * Send a packet through an SGE control Tx queue. Packets sent through
1274 * a control queue must fit entirely as immediate data in a single Tx
1275 * descriptor and have no page fragments.
1276 */
1277static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1278 struct sk_buff *skb)
1279{
1280 int ret;
1281 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1282
1283 if (unlikely(!immediate(skb))) {
1284 WARN_ON(1);
1285 dev_kfree_skb(skb);
1286 return NET_XMIT_SUCCESS;
1287 }
1288
1289 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1290 wrp->wr_lo = htonl(V_WR_TID(q->token));
1291
1292 spin_lock(&q->lock);
1293 again:reclaim_completed_tx_imm(q);
1294
1295 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1296 if (unlikely(ret)) {
1297 if (ret == 1) {
1298 spin_unlock(&q->lock);
1299 return NET_XMIT_CN;
1300 }
1301 goto again;
1302 }
1303
1304 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1305
1306 q->in_use++;
1307 if (++q->pidx >= q->size) {
1308 q->pidx = 0;
1309 q->gen ^= 1;
1310 }
1311 spin_unlock(&q->lock);
1312 wmb();
1313 t3_write_reg(adap, A_SG_KDOORBELL,
1314 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1315 return NET_XMIT_SUCCESS;
1316}
1317
1318/**
1319 * restart_ctrlq - restart a suspended control queue
1320 * @qs: the queue set cotaining the control queue
1321 *
1322 * Resumes transmission on a suspended Tx control queue.
1323 */
1324static void restart_ctrlq(unsigned long data)
1325{
1326 struct sk_buff *skb;
1327 struct sge_qset *qs = (struct sge_qset *)data;
1328 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1329 struct adapter *adap = qs->netdev->priv;
1330
1331 spin_lock(&q->lock);
1332 again:reclaim_completed_tx_imm(q);
1333
1334 while (q->in_use < q->size && (skb = __skb_dequeue(&q->sendq)) != NULL) {
1335
1336 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1337
1338 if (++q->pidx >= q->size) {
1339 q->pidx = 0;
1340 q->gen ^= 1;
1341 }
1342 q->in_use++;
1343 }
1344
1345 if (!skb_queue_empty(&q->sendq)) {
1346 set_bit(TXQ_CTRL, &qs->txq_stopped);
1347 smp_mb__after_clear_bit();
1348
1349 if (should_restart_tx(q) &&
1350 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1351 goto again;
1352 q->stops++;
1353 }
1354
1355 spin_unlock(&q->lock);
1356 t3_write_reg(adap, A_SG_KDOORBELL,
1357 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1358}
1359
14ab9892
DLR
1360/*
1361 * Send a management message through control queue 0
1362 */
1363int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1364{
1365 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1366}
1367
99d7cf30
DLR
1368/**
1369 * deferred_unmap_destructor - unmap a packet when it is freed
1370 * @skb: the packet
1371 *
1372 * This is the packet destructor used for Tx packets that need to remain
1373 * mapped until they are freed rather than until their Tx descriptors are
1374 * freed.
1375 */
1376static void deferred_unmap_destructor(struct sk_buff *skb)
1377{
1378 int i;
1379 const dma_addr_t *p;
1380 const struct skb_shared_info *si;
1381 const struct deferred_unmap_info *dui;
1382 const struct unmap_info *ui = (struct unmap_info *)skb->cb;
1383
1384 dui = (struct deferred_unmap_info *)skb->head;
1385 p = dui->addr;
1386
1387 if (ui->len)
1388 pci_unmap_single(dui->pdev, *p++, ui->len, PCI_DMA_TODEVICE);
1389
1390 si = skb_shinfo(skb);
1391 for (i = 0; i < si->nr_frags; i++)
1392 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1393 PCI_DMA_TODEVICE);
1394}
1395
1396static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1397 const struct sg_ent *sgl, int sgl_flits)
1398{
1399 dma_addr_t *p;
1400 struct deferred_unmap_info *dui;
1401
1402 dui = (struct deferred_unmap_info *)skb->head;
1403 dui->pdev = pdev;
1404 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1405 *p++ = be64_to_cpu(sgl->addr[0]);
1406 *p++ = be64_to_cpu(sgl->addr[1]);
1407 }
1408 if (sgl_flits)
1409 *p = be64_to_cpu(sgl->addr[0]);
1410}
1411
4d22de3e
DLR
1412/**
1413 * write_ofld_wr - write an offload work request
1414 * @adap: the adapter
1415 * @skb: the packet to send
1416 * @q: the Tx queue
1417 * @pidx: index of the first Tx descriptor to write
1418 * @gen: the generation value to use
1419 * @ndesc: number of descriptors the packet will occupy
1420 *
1421 * Write an offload work request to send the supplied packet. The packet
1422 * data already carry the work request with most fields populated.
1423 */
1424static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1425 struct sge_txq *q, unsigned int pidx,
1426 unsigned int gen, unsigned int ndesc)
1427{
1428 unsigned int sgl_flits, flits;
1429 struct work_request_hdr *from;
1430 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1431 struct tx_desc *d = &q->desc[pidx];
1432
1433 if (immediate(skb)) {
1434 q->sdesc[pidx].skb = NULL;
1435 write_imm(d, skb, skb->len, gen);
1436 return;
1437 }
1438
1439 /* Only TX_DATA builds SGLs */
1440
1441 from = (struct work_request_hdr *)skb->data;
ea2ae17d
ACM
1442 memcpy(&d->flit[1], &from[1],
1443 skb_transport_offset(skb) - sizeof(*from));
4d22de3e 1444
ea2ae17d 1445 flits = skb_transport_offset(skb) / 8;
4d22de3e 1446 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
9c70220b 1447 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
27a884dc 1448 skb->tail - skb->transport_header,
4d22de3e 1449 adap->pdev);
99d7cf30
DLR
1450 if (need_skb_unmap()) {
1451 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1452 skb->destructor = deferred_unmap_destructor;
9c70220b 1453 ((struct unmap_info *)skb->cb)->len = (skb->tail -
27a884dc 1454 skb->transport_header);
99d7cf30 1455 }
4d22de3e
DLR
1456
1457 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1458 gen, from->wr_hi, from->wr_lo);
1459}
1460
1461/**
1462 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1463 * @skb: the packet
1464 *
1465 * Returns the number of Tx descriptors needed for the given offload
1466 * packet. These packets are already fully constructed.
1467 */
1468static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1469{
1470 unsigned int flits, cnt = skb_shinfo(skb)->nr_frags;
1471
1472 if (skb->len <= WR_LEN && cnt == 0)
1473 return 1; /* packet fits as immediate data */
1474
ea2ae17d 1475 flits = skb_transport_offset(skb) / 8; /* headers */
27a884dc 1476 if (skb->tail != skb->transport_header)
4d22de3e
DLR
1477 cnt++;
1478 return flits_to_desc(flits + sgl_len(cnt));
1479}
1480
1481/**
1482 * ofld_xmit - send a packet through an offload queue
1483 * @adap: the adapter
1484 * @q: the Tx offload queue
1485 * @skb: the packet
1486 *
1487 * Send an offload packet through an SGE offload queue.
1488 */
1489static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1490 struct sk_buff *skb)
1491{
1492 int ret;
1493 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1494
1495 spin_lock(&q->lock);
1496 again:reclaim_completed_tx(adap, q);
1497
1498 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1499 if (unlikely(ret)) {
1500 if (ret == 1) {
1501 skb->priority = ndesc; /* save for restart */
1502 spin_unlock(&q->lock);
1503 return NET_XMIT_CN;
1504 }
1505 goto again;
1506 }
1507
1508 gen = q->gen;
1509 q->in_use += ndesc;
1510 pidx = q->pidx;
1511 q->pidx += ndesc;
1512 if (q->pidx >= q->size) {
1513 q->pidx -= q->size;
1514 q->gen ^= 1;
1515 }
1516 spin_unlock(&q->lock);
1517
1518 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1519 check_ring_tx_db(adap, q);
1520 return NET_XMIT_SUCCESS;
1521}
1522
1523/**
1524 * restart_offloadq - restart a suspended offload queue
1525 * @qs: the queue set cotaining the offload queue
1526 *
1527 * Resumes transmission on a suspended Tx offload queue.
1528 */
1529static void restart_offloadq(unsigned long data)
1530{
1531 struct sk_buff *skb;
1532 struct sge_qset *qs = (struct sge_qset *)data;
1533 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1534 struct adapter *adap = qs->netdev->priv;
1535
1536 spin_lock(&q->lock);
1537 again:reclaim_completed_tx(adap, q);
1538
1539 while ((skb = skb_peek(&q->sendq)) != NULL) {
1540 unsigned int gen, pidx;
1541 unsigned int ndesc = skb->priority;
1542
1543 if (unlikely(q->size - q->in_use < ndesc)) {
1544 set_bit(TXQ_OFLD, &qs->txq_stopped);
1545 smp_mb__after_clear_bit();
1546
1547 if (should_restart_tx(q) &&
1548 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1549 goto again;
1550 q->stops++;
1551 break;
1552 }
1553
1554 gen = q->gen;
1555 q->in_use += ndesc;
1556 pidx = q->pidx;
1557 q->pidx += ndesc;
1558 if (q->pidx >= q->size) {
1559 q->pidx -= q->size;
1560 q->gen ^= 1;
1561 }
1562 __skb_unlink(skb, &q->sendq);
1563 spin_unlock(&q->lock);
1564
1565 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1566 spin_lock(&q->lock);
1567 }
1568 spin_unlock(&q->lock);
1569
1570#if USE_GTS
1571 set_bit(TXQ_RUNNING, &q->flags);
1572 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1573#endif
1574 t3_write_reg(adap, A_SG_KDOORBELL,
1575 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1576}
1577
1578/**
1579 * queue_set - return the queue set a packet should use
1580 * @skb: the packet
1581 *
1582 * Maps a packet to the SGE queue set it should use. The desired queue
1583 * set is carried in bits 1-3 in the packet's priority.
1584 */
1585static inline int queue_set(const struct sk_buff *skb)
1586{
1587 return skb->priority >> 1;
1588}
1589
1590/**
1591 * is_ctrl_pkt - return whether an offload packet is a control packet
1592 * @skb: the packet
1593 *
1594 * Determines whether an offload packet should use an OFLD or a CTRL
1595 * Tx queue. This is indicated by bit 0 in the packet's priority.
1596 */
1597static inline int is_ctrl_pkt(const struct sk_buff *skb)
1598{
1599 return skb->priority & 1;
1600}
1601
1602/**
1603 * t3_offload_tx - send an offload packet
1604 * @tdev: the offload device to send to
1605 * @skb: the packet
1606 *
1607 * Sends an offload packet. We use the packet priority to select the
1608 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1609 * should be sent as regular or control, bits 1-3 select the queue set.
1610 */
1611int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1612{
1613 struct adapter *adap = tdev2adap(tdev);
1614 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1615
1616 if (unlikely(is_ctrl_pkt(skb)))
1617 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1618
1619 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1620}
1621
1622/**
1623 * offload_enqueue - add an offload packet to an SGE offload receive queue
1624 * @q: the SGE response queue
1625 * @skb: the packet
1626 *
1627 * Add a new offload packet to an SGE response queue's offload packet
1628 * queue. If the packet is the first on the queue it schedules the RX
1629 * softirq to process the queue.
1630 */
1631static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1632{
1633 skb->next = skb->prev = NULL;
1634 if (q->rx_tail)
1635 q->rx_tail->next = skb;
1636 else {
1637 struct sge_qset *qs = rspq_to_qset(q);
1638
1639 if (__netif_rx_schedule_prep(qs->netdev))
1640 __netif_rx_schedule(qs->netdev);
1641 q->rx_head = skb;
1642 }
1643 q->rx_tail = skb;
1644}
1645
1646/**
1647 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1648 * @tdev: the offload device that will be receiving the packets
1649 * @q: the SGE response queue that assembled the bundle
1650 * @skbs: the partial bundle
1651 * @n: the number of packets in the bundle
1652 *
1653 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1654 */
1655static inline void deliver_partial_bundle(struct t3cdev *tdev,
1656 struct sge_rspq *q,
1657 struct sk_buff *skbs[], int n)
1658{
1659 if (n) {
1660 q->offload_bundles++;
1661 tdev->recv(tdev, skbs, n);
1662 }
1663}
1664
1665/**
1666 * ofld_poll - NAPI handler for offload packets in interrupt mode
1667 * @dev: the network device doing the polling
1668 * @budget: polling budget
1669 *
1670 * The NAPI handler for offload packets when a response queue is serviced
1671 * by the hard interrupt handler, i.e., when it's operating in non-polling
1672 * mode. Creates small packet batches and sends them through the offload
1673 * receive handler. Batches need to be of modest size as we do prefetches
1674 * on the packets in each.
1675 */
1676static int ofld_poll(struct net_device *dev, int *budget)
1677{
1678 struct adapter *adapter = dev->priv;
1679 struct sge_qset *qs = dev2qset(dev);
1680 struct sge_rspq *q = &qs->rspq;
1681 int work_done, limit = min(*budget, dev->quota), avail = limit;
1682
1683 while (avail) {
1684 struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
1685 int ngathered;
1686
1687 spin_lock_irq(&q->lock);
1688 head = q->rx_head;
1689 if (!head) {
1690 work_done = limit - avail;
1691 *budget -= work_done;
1692 dev->quota -= work_done;
1693 __netif_rx_complete(dev);
1694 spin_unlock_irq(&q->lock);
1695 return 0;
1696 }
1697
1698 tail = q->rx_tail;
1699 q->rx_head = q->rx_tail = NULL;
1700 spin_unlock_irq(&q->lock);
1701
1702 for (ngathered = 0; avail && head; avail--) {
1703 prefetch(head->data);
1704 skbs[ngathered] = head;
1705 head = head->next;
1706 skbs[ngathered]->next = NULL;
1707 if (++ngathered == RX_BUNDLE_SIZE) {
1708 q->offload_bundles++;
1709 adapter->tdev.recv(&adapter->tdev, skbs,
1710 ngathered);
1711 ngathered = 0;
1712 }
1713 }
1714 if (head) { /* splice remaining packets back onto Rx queue */
1715 spin_lock_irq(&q->lock);
1716 tail->next = q->rx_head;
1717 if (!q->rx_head)
1718 q->rx_tail = tail;
1719 q->rx_head = head;
1720 spin_unlock_irq(&q->lock);
1721 }
1722 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1723 }
1724 work_done = limit - avail;
1725 *budget -= work_done;
1726 dev->quota -= work_done;
1727 return 1;
1728}
1729
1730/**
1731 * rx_offload - process a received offload packet
1732 * @tdev: the offload device receiving the packet
1733 * @rq: the response queue that received the packet
1734 * @skb: the packet
1735 * @rx_gather: a gather list of packets if we are building a bundle
1736 * @gather_idx: index of the next available slot in the bundle
1737 *
1738 * Process an ingress offload pakcet and add it to the offload ingress
1739 * queue. Returns the index of the next available slot in the bundle.
1740 */
1741static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1742 struct sk_buff *skb, struct sk_buff *rx_gather[],
1743 unsigned int gather_idx)
1744{
1745 rq->offload_pkts++;
459a98ed 1746 skb_reset_mac_header(skb);
c1d2bbe1 1747 skb_reset_network_header(skb);
badff6d0 1748 skb_reset_transport_header(skb);
4d22de3e
DLR
1749
1750 if (rq->polling) {
1751 rx_gather[gather_idx++] = skb;
1752 if (gather_idx == RX_BUNDLE_SIZE) {
1753 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1754 gather_idx = 0;
1755 rq->offload_bundles++;
1756 }
1757 } else
1758 offload_enqueue(rq, skb);
1759
1760 return gather_idx;
1761}
1762
4d22de3e
DLR
1763/**
1764 * restart_tx - check whether to restart suspended Tx queues
1765 * @qs: the queue set to resume
1766 *
1767 * Restarts suspended Tx queues of an SGE queue set if they have enough
1768 * free resources to resume operation.
1769 */
1770static void restart_tx(struct sge_qset *qs)
1771{
1772 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1773 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1774 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1775 qs->txq[TXQ_ETH].restarts++;
1776 if (netif_running(qs->netdev))
1777 netif_wake_queue(qs->netdev);
1778 }
1779
1780 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1781 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1782 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1783 qs->txq[TXQ_OFLD].restarts++;
1784 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1785 }
1786 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1787 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1788 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1789 qs->txq[TXQ_CTRL].restarts++;
1790 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1791 }
1792}
1793
1794/**
1795 * rx_eth - process an ingress ethernet packet
1796 * @adap: the adapter
1797 * @rq: the response queue that received the packet
1798 * @skb: the packet
1799 * @pad: amount of padding at the start of the buffer
1800 *
1801 * Process an ingress ethernet pakcet and deliver it to the stack.
1802 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1803 * if it was immediate data in a response.
1804 */
1805static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1806 struct sk_buff *skb, int pad)
1807{
1808 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1809 struct port_info *pi;
1810
4d22de3e 1811 skb_pull(skb, sizeof(*p) + pad);
4c13eb66 1812 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
e360b562 1813 skb->dev->last_rx = jiffies;
4d22de3e
DLR
1814 pi = netdev_priv(skb->dev);
1815 if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
1816 !p->fragment) {
1817 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1818 skb->ip_summed = CHECKSUM_UNNECESSARY;
1819 } else
1820 skb->ip_summed = CHECKSUM_NONE;
1821
1822 if (unlikely(p->vlan_valid)) {
1823 struct vlan_group *grp = pi->vlan_grp;
1824
1825 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++;
1826 if (likely(grp))
1827 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1828 rq->polling);
1829 else
1830 dev_kfree_skb_any(skb);
1831 } else if (rq->polling)
1832 netif_receive_skb(skb);
1833 else
1834 netif_rx(skb);
1835}
1836
1837/**
1838 * handle_rsp_cntrl_info - handles control information in a response
1839 * @qs: the queue set corresponding to the response
1840 * @flags: the response control flags
4d22de3e
DLR
1841 *
1842 * Handles the control information of an SGE response, such as GTS
1843 * indications and completion credits for the queue set's Tx queues.
6195c71d 1844 * HW coalesces credits, we don't do any extra SW coalescing.
4d22de3e 1845 */
6195c71d 1846static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
4d22de3e
DLR
1847{
1848 unsigned int credits;
1849
1850#if USE_GTS
1851 if (flags & F_RSPD_TXQ0_GTS)
1852 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1853#endif
1854
4d22de3e
DLR
1855 credits = G_RSPD_TXQ0_CR(flags);
1856 if (credits)
1857 qs->txq[TXQ_ETH].processed += credits;
1858
6195c71d
DLR
1859 credits = G_RSPD_TXQ2_CR(flags);
1860 if (credits)
1861 qs->txq[TXQ_CTRL].processed += credits;
1862
4d22de3e
DLR
1863# if USE_GTS
1864 if (flags & F_RSPD_TXQ1_GTS)
1865 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1866# endif
6195c71d
DLR
1867 credits = G_RSPD_TXQ1_CR(flags);
1868 if (credits)
1869 qs->txq[TXQ_OFLD].processed += credits;
4d22de3e
DLR
1870}
1871
1872/**
1873 * check_ring_db - check if we need to ring any doorbells
1874 * @adapter: the adapter
1875 * @qs: the queue set whose Tx queues are to be examined
1876 * @sleeping: indicates which Tx queue sent GTS
1877 *
1878 * Checks if some of a queue set's Tx queues need to ring their doorbells
1879 * to resume transmission after idling while they still have unprocessed
1880 * descriptors.
1881 */
1882static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
1883 unsigned int sleeping)
1884{
1885 if (sleeping & F_RSPD_TXQ0_GTS) {
1886 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1887
1888 if (txq->cleaned + txq->in_use != txq->processed &&
1889 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1890 set_bit(TXQ_RUNNING, &txq->flags);
1891 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1892 V_EGRCNTX(txq->cntxt_id));
1893 }
1894 }
1895
1896 if (sleeping & F_RSPD_TXQ1_GTS) {
1897 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
1898
1899 if (txq->cleaned + txq->in_use != txq->processed &&
1900 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1901 set_bit(TXQ_RUNNING, &txq->flags);
1902 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1903 V_EGRCNTX(txq->cntxt_id));
1904 }
1905 }
1906}
1907
1908/**
1909 * is_new_response - check if a response is newly written
1910 * @r: the response descriptor
1911 * @q: the response queue
1912 *
1913 * Returns true if a response descriptor contains a yet unprocessed
1914 * response.
1915 */
1916static inline int is_new_response(const struct rsp_desc *r,
1917 const struct sge_rspq *q)
1918{
1919 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1920}
1921
1922#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1923#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1924 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1925 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1926 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1927
1928/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1929#define NOMEM_INTR_DELAY 2500
1930
1931/**
1932 * process_responses - process responses from an SGE response queue
1933 * @adap: the adapter
1934 * @qs: the queue set to which the response queue belongs
1935 * @budget: how many responses can be processed in this round
1936 *
1937 * Process responses from an SGE response queue up to the supplied budget.
1938 * Responses include received packets as well as credits and other events
1939 * for the queues that belong to the response queue's queue set.
1940 * A negative budget is effectively unlimited.
1941 *
1942 * Additionally choose the interrupt holdoff time for the next interrupt
1943 * on this queue. If the system is under memory shortage use a fairly
1944 * long delay to help recovery.
1945 */
1946static int process_responses(struct adapter *adap, struct sge_qset *qs,
1947 int budget)
1948{
1949 struct sge_rspq *q = &qs->rspq;
1950 struct rsp_desc *r = &q->desc[q->cidx];
1951 int budget_left = budget;
6195c71d 1952 unsigned int sleeping = 0;
4d22de3e
DLR
1953 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
1954 int ngathered = 0;
1955
1956 q->next_holdoff = q->holdoff_tmr;
1957
1958 while (likely(budget_left && is_new_response(r, q))) {
e0994eb1 1959 int eth, ethpad = 2;
4d22de3e
DLR
1960 struct sk_buff *skb = NULL;
1961 u32 len, flags = ntohl(r->flags);
1962 u32 rss_hi = *(const u32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
1963
1964 eth = r->rss_hdr.opcode == CPL_RX_PKT;
1965
1966 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
1967 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
1968 if (!skb)
1969 goto no_mem;
1970
1971 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
1972 skb->data[0] = CPL_ASYNC_NOTIF;
1973 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
1974 q->async_notif++;
1975 } else if (flags & F_RSPD_IMM_DATA_VALID) {
1976 skb = get_imm_packet(r);
1977 if (unlikely(!skb)) {
cf992af5 1978no_mem:
4d22de3e
DLR
1979 q->next_holdoff = NOMEM_INTR_DELAY;
1980 q->nomem++;
1981 /* consume one credit since we tried */
1982 budget_left--;
1983 break;
1984 }
1985 q->imm_data++;
e0994eb1 1986 ethpad = 0;
4d22de3e 1987 } else if ((len = ntohl(r->len_cq)) != 0) {
cf992af5 1988 struct sge_fl *fl;
e0994eb1 1989
cf992af5
DLR
1990 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
1991 if (fl->use_pages) {
1992 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
e0994eb1 1993
cf992af5
DLR
1994 prefetch(addr);
1995#if L1_CACHE_BYTES < 128
1996 prefetch(addr + L1_CACHE_BYTES);
1997#endif
e0994eb1
DLR
1998 __refill_fl(adap, fl);
1999
cf992af5
DLR
2000 skb = get_packet_pg(adap, fl, G_RSPD_LEN(len),
2001 eth ? SGE_RX_DROP_THRES : 0);
2002 } else
e0994eb1
DLR
2003 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2004 eth ? SGE_RX_DROP_THRES : 0);
cf992af5
DLR
2005 if (unlikely(!skb)) {
2006 if (!eth)
2007 goto no_mem;
2008 q->rx_drops++;
2009 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2010 __skb_pull(skb, 2);
4d22de3e 2011
4d22de3e
DLR
2012 if (++fl->cidx == fl->size)
2013 fl->cidx = 0;
2014 } else
2015 q->pure_rsps++;
2016
2017 if (flags & RSPD_CTRL_MASK) {
2018 sleeping |= flags & RSPD_GTS_MASK;
6195c71d 2019 handle_rsp_cntrl_info(qs, flags);
4d22de3e
DLR
2020 }
2021
2022 r++;
2023 if (unlikely(++q->cidx == q->size)) {
2024 q->cidx = 0;
2025 q->gen ^= 1;
2026 r = q->desc;
2027 }
2028 prefetch(r);
2029
2030 if (++q->credits >= (q->size / 4)) {
2031 refill_rspq(adap, q, q->credits);
2032 q->credits = 0;
2033 }
2034
cf992af5 2035 if (likely(skb != NULL)) {
4d22de3e
DLR
2036 if (eth)
2037 rx_eth(adap, q, skb, ethpad);
2038 else {
cf992af5
DLR
2039 /* Preserve the RSS info in csum & priority */
2040 skb->csum = rss_hi;
2041 skb->priority = rss_lo;
2042 ngathered = rx_offload(&adap->tdev, q, skb,
2043 offload_skbs,
e0994eb1 2044 ngathered);
4d22de3e
DLR
2045 }
2046 }
4d22de3e
DLR
2047 --budget_left;
2048 }
2049
4d22de3e
DLR
2050 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2051 if (sleeping)
2052 check_ring_db(adap, qs, sleeping);
2053
2054 smp_mb(); /* commit Tx queue .processed updates */
2055 if (unlikely(qs->txq_stopped != 0))
2056 restart_tx(qs);
2057
2058 budget -= budget_left;
2059 return budget;
2060}
2061
2062static inline int is_pure_response(const struct rsp_desc *r)
2063{
2064 u32 n = ntohl(r->flags) & (F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2065
2066 return (n | r->len_cq) == 0;
2067}
2068
2069/**
2070 * napi_rx_handler - the NAPI handler for Rx processing
2071 * @dev: the net device
2072 * @budget: how many packets we can process in this round
2073 *
2074 * Handler for new data events when using NAPI.
2075 */
2076static int napi_rx_handler(struct net_device *dev, int *budget)
2077{
2078 struct adapter *adap = dev->priv;
2079 struct sge_qset *qs = dev2qset(dev);
2080 int effective_budget = min(*budget, dev->quota);
2081
2082 int work_done = process_responses(adap, qs, effective_budget);
2083 *budget -= work_done;
2084 dev->quota -= work_done;
2085
2086 if (work_done >= effective_budget)
2087 return 1;
2088
2089 netif_rx_complete(dev);
2090
2091 /*
2092 * Because we don't atomically flush the following write it is
2093 * possible that in very rare cases it can reach the device in a way
2094 * that races with a new response being written plus an error interrupt
2095 * causing the NAPI interrupt handler below to return unhandled status
2096 * to the OS. To protect against this would require flushing the write
2097 * and doing both the write and the flush with interrupts off. Way too
2098 * expensive and unjustifiable given the rarity of the race.
2099 *
2100 * The race cannot happen at all with MSI-X.
2101 */
2102 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2103 V_NEWTIMER(qs->rspq.next_holdoff) |
2104 V_NEWINDEX(qs->rspq.cidx));
2105 return 0;
2106}
2107
2108/*
2109 * Returns true if the device is already scheduled for polling.
2110 */
2111static inline int napi_is_scheduled(struct net_device *dev)
2112{
2113 return test_bit(__LINK_STATE_RX_SCHED, &dev->state);
2114}
2115
2116/**
2117 * process_pure_responses - process pure responses from a response queue
2118 * @adap: the adapter
2119 * @qs: the queue set owning the response queue
2120 * @r: the first pure response to process
2121 *
2122 * A simpler version of process_responses() that handles only pure (i.e.,
2123 * non data-carrying) responses. Such respones are too light-weight to
2124 * justify calling a softirq under NAPI, so we handle them specially in
2125 * the interrupt handler. The function is called with a pointer to a
2126 * response, which the caller must ensure is a valid pure response.
2127 *
2128 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2129 */
2130static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2131 struct rsp_desc *r)
2132{
2133 struct sge_rspq *q = &qs->rspq;
6195c71d 2134 unsigned int sleeping = 0;
4d22de3e
DLR
2135
2136 do {
2137 u32 flags = ntohl(r->flags);
2138
2139 r++;
2140 if (unlikely(++q->cidx == q->size)) {
2141 q->cidx = 0;
2142 q->gen ^= 1;
2143 r = q->desc;
2144 }
2145 prefetch(r);
2146
2147 if (flags & RSPD_CTRL_MASK) {
2148 sleeping |= flags & RSPD_GTS_MASK;
6195c71d 2149 handle_rsp_cntrl_info(qs, flags);
4d22de3e
DLR
2150 }
2151
2152 q->pure_rsps++;
2153 if (++q->credits >= (q->size / 4)) {
2154 refill_rspq(adap, q, q->credits);
2155 q->credits = 0;
2156 }
2157 } while (is_new_response(r, q) && is_pure_response(r));
2158
4d22de3e
DLR
2159 if (sleeping)
2160 check_ring_db(adap, qs, sleeping);
2161
2162 smp_mb(); /* commit Tx queue .processed updates */
2163 if (unlikely(qs->txq_stopped != 0))
2164 restart_tx(qs);
2165
2166 return is_new_response(r, q);
2167}
2168
2169/**
2170 * handle_responses - decide what to do with new responses in NAPI mode
2171 * @adap: the adapter
2172 * @q: the response queue
2173 *
2174 * This is used by the NAPI interrupt handlers to decide what to do with
2175 * new SGE responses. If there are no new responses it returns -1. If
2176 * there are new responses and they are pure (i.e., non-data carrying)
2177 * it handles them straight in hard interrupt context as they are very
2178 * cheap and don't deliver any packets. Finally, if there are any data
2179 * signaling responses it schedules the NAPI handler. Returns 1 if it
2180 * schedules NAPI, 0 if all new responses were pure.
2181 *
2182 * The caller must ascertain NAPI is not already running.
2183 */
2184static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2185{
2186 struct sge_qset *qs = rspq_to_qset(q);
2187 struct rsp_desc *r = &q->desc[q->cidx];
2188
2189 if (!is_new_response(r, q))
2190 return -1;
2191 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2192 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2193 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2194 return 0;
2195 }
2196 if (likely(__netif_rx_schedule_prep(qs->netdev)))
2197 __netif_rx_schedule(qs->netdev);
2198 return 1;
2199}
2200
2201/*
2202 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2203 * (i.e., response queue serviced in hard interrupt).
2204 */
2205irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2206{
2207 struct sge_qset *qs = cookie;
2208 struct adapter *adap = qs->netdev->priv;
2209 struct sge_rspq *q = &qs->rspq;
2210
2211 spin_lock(&q->lock);
2212 if (process_responses(adap, qs, -1) == 0)
2213 q->unhandled_irqs++;
2214 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2215 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2216 spin_unlock(&q->lock);
2217 return IRQ_HANDLED;
2218}
2219
2220/*
2221 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2222 * (i.e., response queue serviced by NAPI polling).
2223 */
2224irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2225{
2226 struct sge_qset *qs = cookie;
2227 struct adapter *adap = qs->netdev->priv;
2228 struct sge_rspq *q = &qs->rspq;
2229
2230 spin_lock(&q->lock);
4d22de3e
DLR
2231
2232 if (handle_responses(adap, q) < 0)
2233 q->unhandled_irqs++;
2234 spin_unlock(&q->lock);
2235 return IRQ_HANDLED;
2236}
2237
2238/*
2239 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2240 * SGE response queues as well as error and other async events as they all use
2241 * the same MSI vector. We use one SGE response queue per port in this mode
2242 * and protect all response queues with queue 0's lock.
2243 */
2244static irqreturn_t t3_intr_msi(int irq, void *cookie)
2245{
2246 int new_packets = 0;
2247 struct adapter *adap = cookie;
2248 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2249
2250 spin_lock(&q->lock);
2251
2252 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2253 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2254 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2255 new_packets = 1;
2256 }
2257
2258 if (adap->params.nports == 2 &&
2259 process_responses(adap, &adap->sge.qs[1], -1)) {
2260 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2261
2262 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2263 V_NEWTIMER(q1->next_holdoff) |
2264 V_NEWINDEX(q1->cidx));
2265 new_packets = 1;
2266 }
2267
2268 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2269 q->unhandled_irqs++;
2270
2271 spin_unlock(&q->lock);
2272 return IRQ_HANDLED;
2273}
2274
2275static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q)
2276{
2277 if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) {
2278 if (likely(__netif_rx_schedule_prep(dev)))
2279 __netif_rx_schedule(dev);
2280 return 1;
2281 }
2282 return 0;
2283}
2284
2285/*
2286 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2287 * by NAPI polling). Handles data events from SGE response queues as well as
2288 * error and other async events as they all use the same MSI vector. We use
2289 * one SGE response queue per port in this mode and protect all response
2290 * queues with queue 0's lock.
2291 */
2292irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2293{
2294 int new_packets;
2295 struct adapter *adap = cookie;
2296 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2297
2298 spin_lock(&q->lock);
2299
2300 new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q);
2301 if (adap->params.nports == 2)
2302 new_packets += rspq_check_napi(adap->sge.qs[1].netdev,
2303 &adap->sge.qs[1].rspq);
2304 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2305 q->unhandled_irqs++;
2306
2307 spin_unlock(&q->lock);
2308 return IRQ_HANDLED;
2309}
2310
2311/*
2312 * A helper function that processes responses and issues GTS.
2313 */
2314static inline int process_responses_gts(struct adapter *adap,
2315 struct sge_rspq *rq)
2316{
2317 int work;
2318
2319 work = process_responses(adap, rspq_to_qset(rq), -1);
2320 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2321 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2322 return work;
2323}
2324
2325/*
2326 * The legacy INTx interrupt handler. This needs to handle data events from
2327 * SGE response queues as well as error and other async events as they all use
2328 * the same interrupt pin. We use one SGE response queue per port in this mode
2329 * and protect all response queues with queue 0's lock.
2330 */
2331static irqreturn_t t3_intr(int irq, void *cookie)
2332{
2333 int work_done, w0, w1;
2334 struct adapter *adap = cookie;
2335 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2336 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2337
2338 spin_lock(&q0->lock);
2339
2340 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2341 w1 = adap->params.nports == 2 &&
2342 is_new_response(&q1->desc[q1->cidx], q1);
2343
2344 if (likely(w0 | w1)) {
2345 t3_write_reg(adap, A_PL_CLI, 0);
2346 t3_read_reg(adap, A_PL_CLI); /* flush */
2347
2348 if (likely(w0))
2349 process_responses_gts(adap, q0);
2350
2351 if (w1)
2352 process_responses_gts(adap, q1);
2353
2354 work_done = w0 | w1;
2355 } else
2356 work_done = t3_slow_intr_handler(adap);
2357
2358 spin_unlock(&q0->lock);
2359 return IRQ_RETVAL(work_done != 0);
2360}
2361
2362/*
2363 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2364 * Handles data events from SGE response queues as well as error and other
2365 * async events as they all use the same interrupt pin. We use one SGE
2366 * response queue per port in this mode and protect all response queues with
2367 * queue 0's lock.
2368 */
2369static irqreturn_t t3b_intr(int irq, void *cookie)
2370{
2371 u32 map;
2372 struct adapter *adap = cookie;
2373 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2374
2375 t3_write_reg(adap, A_PL_CLI, 0);
2376 map = t3_read_reg(adap, A_SG_DATA_INTR);
2377
2378 if (unlikely(!map)) /* shared interrupt, most likely */
2379 return IRQ_NONE;
2380
2381 spin_lock(&q0->lock);
2382
2383 if (unlikely(map & F_ERRINTR))
2384 t3_slow_intr_handler(adap);
2385
2386 if (likely(map & 1))
2387 process_responses_gts(adap, q0);
2388
2389 if (map & 2)
2390 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2391
2392 spin_unlock(&q0->lock);
2393 return IRQ_HANDLED;
2394}
2395
2396/*
2397 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2398 * Handles data events from SGE response queues as well as error and other
2399 * async events as they all use the same interrupt pin. We use one SGE
2400 * response queue per port in this mode and protect all response queues with
2401 * queue 0's lock.
2402 */
2403static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2404{
2405 u32 map;
2406 struct net_device *dev;
2407 struct adapter *adap = cookie;
2408 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2409
2410 t3_write_reg(adap, A_PL_CLI, 0);
2411 map = t3_read_reg(adap, A_SG_DATA_INTR);
2412
2413 if (unlikely(!map)) /* shared interrupt, most likely */
2414 return IRQ_NONE;
2415
2416 spin_lock(&q0->lock);
2417
2418 if (unlikely(map & F_ERRINTR))
2419 t3_slow_intr_handler(adap);
2420
2421 if (likely(map & 1)) {
2422 dev = adap->sge.qs[0].netdev;
2423
4d22de3e
DLR
2424 if (likely(__netif_rx_schedule_prep(dev)))
2425 __netif_rx_schedule(dev);
2426 }
2427 if (map & 2) {
2428 dev = adap->sge.qs[1].netdev;
2429
4d22de3e
DLR
2430 if (likely(__netif_rx_schedule_prep(dev)))
2431 __netif_rx_schedule(dev);
2432 }
2433
2434 spin_unlock(&q0->lock);
2435 return IRQ_HANDLED;
2436}
2437
2438/**
2439 * t3_intr_handler - select the top-level interrupt handler
2440 * @adap: the adapter
2441 * @polling: whether using NAPI to service response queues
2442 *
2443 * Selects the top-level interrupt handler based on the type of interrupts
2444 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2445 * response queues.
2446 */
2447intr_handler_t t3_intr_handler(struct adapter *adap, int polling)
2448{
2449 if (adap->flags & USING_MSIX)
2450 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2451 if (adap->flags & USING_MSI)
2452 return polling ? t3_intr_msi_napi : t3_intr_msi;
2453 if (adap->params.rev > 0)
2454 return polling ? t3b_intr_napi : t3b_intr;
2455 return t3_intr;
2456}
2457
2458/**
2459 * t3_sge_err_intr_handler - SGE async event interrupt handler
2460 * @adapter: the adapter
2461 *
2462 * Interrupt handler for SGE asynchronous (non-data) events.
2463 */
2464void t3_sge_err_intr_handler(struct adapter *adapter)
2465{
2466 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2467
2468 if (status & F_RSPQCREDITOVERFOW)
2469 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2470
2471 if (status & F_RSPQDISABLED) {
2472 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2473
2474 CH_ALERT(adapter,
2475 "packet delivered to disabled response queue "
2476 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2477 }
2478
2479 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2480 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
2481 t3_fatal_err(adapter);
2482}
2483
2484/**
2485 * sge_timer_cb - perform periodic maintenance of an SGE qset
2486 * @data: the SGE queue set to maintain
2487 *
2488 * Runs periodically from a timer to perform maintenance of an SGE queue
2489 * set. It performs two tasks:
2490 *
2491 * a) Cleans up any completed Tx descriptors that may still be pending.
2492 * Normal descriptor cleanup happens when new packets are added to a Tx
2493 * queue so this timer is relatively infrequent and does any cleanup only
2494 * if the Tx queue has not seen any new packets in a while. We make a
2495 * best effort attempt to reclaim descriptors, in that we don't wait
2496 * around if we cannot get a queue's lock (which most likely is because
2497 * someone else is queueing new packets and so will also handle the clean
2498 * up). Since control queues use immediate data exclusively we don't
2499 * bother cleaning them up here.
2500 *
2501 * b) Replenishes Rx queues that have run out due to memory shortage.
2502 * Normally new Rx buffers are added when existing ones are consumed but
2503 * when out of memory a queue can become empty. We try to add only a few
2504 * buffers here, the queue will be replenished fully as these new buffers
2505 * are used up if memory shortage has subsided.
2506 */
2507static void sge_timer_cb(unsigned long data)
2508{
2509 spinlock_t *lock;
2510 struct sge_qset *qs = (struct sge_qset *)data;
2511 struct adapter *adap = qs->netdev->priv;
2512
2513 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2514 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2515 spin_unlock(&qs->txq[TXQ_ETH].lock);
2516 }
2517 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2518 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2519 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2520 }
2521 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
e0994eb1 2522 &adap->sge.qs[0].rspq.lock;
4d22de3e
DLR
2523 if (spin_trylock_irq(lock)) {
2524 if (!napi_is_scheduled(qs->netdev)) {
bae73f44
DLR
2525 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2526
4d22de3e
DLR
2527 if (qs->fl[0].credits < qs->fl[0].size)
2528 __refill_fl(adap, &qs->fl[0]);
2529 if (qs->fl[1].credits < qs->fl[1].size)
2530 __refill_fl(adap, &qs->fl[1]);
bae73f44
DLR
2531
2532 if (status & (1 << qs->rspq.cntxt_id)) {
2533 qs->rspq.starved++;
2534 if (qs->rspq.credits) {
2535 refill_rspq(adap, &qs->rspq, 1);
2536 qs->rspq.credits--;
2537 qs->rspq.restarted++;
e0994eb1 2538 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
bae73f44
DLR
2539 1 << qs->rspq.cntxt_id);
2540 }
2541 }
4d22de3e
DLR
2542 }
2543 spin_unlock_irq(lock);
2544 }
2545 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2546}
2547
2548/**
2549 * t3_update_qset_coalesce - update coalescing settings for a queue set
2550 * @qs: the SGE queue set
2551 * @p: new queue set parameters
2552 *
2553 * Update the coalescing settings for an SGE queue set. Nothing is done
2554 * if the queue set is not initialized yet.
2555 */
2556void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2557{
2558 if (!qs->netdev)
2559 return;
2560
2561 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2562 qs->rspq.polling = p->polling;
2563 qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll;
2564}
2565
2566/**
2567 * t3_sge_alloc_qset - initialize an SGE queue set
2568 * @adapter: the adapter
2569 * @id: the queue set id
2570 * @nports: how many Ethernet ports will be using this queue set
2571 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2572 * @p: configuration parameters for this queue set
2573 * @ntxq: number of Tx queues for the queue set
2574 * @netdev: net device associated with this queue set
2575 *
2576 * Allocate resources and initialize an SGE queue set. A queue set
2577 * comprises a response queue, two Rx free-buffer queues, and up to 3
2578 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2579 * queue, offload queue, and control queue.
2580 */
2581int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2582 int irq_vec_idx, const struct qset_params *p,
2583 int ntxq, struct net_device *netdev)
2584{
2585 int i, ret = -ENOMEM;
2586 struct sge_qset *q = &adapter->sge.qs[id];
2587
2588 init_qset_cntxt(q, id);
2589 init_timer(&q->tx_reclaim_timer);
2590 q->tx_reclaim_timer.data = (unsigned long)q;
2591 q->tx_reclaim_timer.function = sge_timer_cb;
2592
2593 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2594 sizeof(struct rx_desc),
2595 sizeof(struct rx_sw_desc),
2596 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2597 if (!q->fl[0].desc)
2598 goto err;
2599
2600 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2601 sizeof(struct rx_desc),
2602 sizeof(struct rx_sw_desc),
2603 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2604 if (!q->fl[1].desc)
2605 goto err;
2606
2607 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2608 sizeof(struct rsp_desc), 0,
2609 &q->rspq.phys_addr, NULL);
2610 if (!q->rspq.desc)
2611 goto err;
2612
2613 for (i = 0; i < ntxq; ++i) {
2614 /*
2615 * The control queue always uses immediate data so does not
2616 * need to keep track of any sk_buffs.
2617 */
2618 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2619
2620 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2621 sizeof(struct tx_desc), sz,
2622 &q->txq[i].phys_addr,
2623 &q->txq[i].sdesc);
2624 if (!q->txq[i].desc)
2625 goto err;
2626
2627 q->txq[i].gen = 1;
2628 q->txq[i].size = p->txq_size[i];
2629 spin_lock_init(&q->txq[i].lock);
2630 skb_queue_head_init(&q->txq[i].sendq);
2631 }
2632
2633 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2634 (unsigned long)q);
2635 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2636 (unsigned long)q);
2637
2638 q->fl[0].gen = q->fl[1].gen = 1;
2639 q->fl[0].size = p->fl_size;
2640 q->fl[1].size = p->jumbo_size;
2641
2642 q->rspq.gen = 1;
2643 q->rspq.size = p->rspq_size;
2644 spin_lock_init(&q->rspq.lock);
2645
2646 q->txq[TXQ_ETH].stop_thres = nports *
2647 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2648
cf992af5
DLR
2649#if FL0_PG_CHUNK_SIZE > 0
2650 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
e0994eb1 2651#else
cf992af5 2652 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
e0994eb1 2653#endif
cf992af5
DLR
2654 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2655 q->fl[1].buf_size = is_offload(adapter) ?
2656 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2657 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
4d22de3e
DLR
2658
2659 spin_lock(&adapter->sge.reg_lock);
2660
2661 /* FL threshold comparison uses < */
2662 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2663 q->rspq.phys_addr, q->rspq.size,
2664 q->fl[0].buf_size, 1, 0);
2665 if (ret)
2666 goto err_unlock;
2667
2668 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2669 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2670 q->fl[i].phys_addr, q->fl[i].size,
2671 q->fl[i].buf_size, p->cong_thres, 1,
2672 0);
2673 if (ret)
2674 goto err_unlock;
2675 }
2676
2677 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2678 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2679 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2680 1, 0);
2681 if (ret)
2682 goto err_unlock;
2683
2684 if (ntxq > 1) {
2685 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2686 USE_GTS, SGE_CNTXT_OFLD, id,
2687 q->txq[TXQ_OFLD].phys_addr,
2688 q->txq[TXQ_OFLD].size, 0, 1, 0);
2689 if (ret)
2690 goto err_unlock;
2691 }
2692
2693 if (ntxq > 2) {
2694 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2695 SGE_CNTXT_CTRL, id,
2696 q->txq[TXQ_CTRL].phys_addr,
2697 q->txq[TXQ_CTRL].size,
2698 q->txq[TXQ_CTRL].token, 1, 0);
2699 if (ret)
2700 goto err_unlock;
2701 }
2702
2703 spin_unlock(&adapter->sge.reg_lock);
2704 q->netdev = netdev;
2705 t3_update_qset_coalesce(q, p);
2706
2707 /*
2708 * We use atalk_ptr as a backpointer to a qset. In case a device is
2709 * associated with multiple queue sets only the first one sets
2710 * atalk_ptr.
2711 */
2712 if (netdev->atalk_ptr == NULL)
2713 netdev->atalk_ptr = q;
2714
2715 refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
2716 refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
2717 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2718
2719 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2720 V_NEWTIMER(q->rspq.holdoff_tmr));
2721
2722 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2723 return 0;
2724
2725 err_unlock:
2726 spin_unlock(&adapter->sge.reg_lock);
2727 err:
2728 t3_free_qset(adapter, q);
2729 return ret;
2730}
2731
2732/**
2733 * t3_free_sge_resources - free SGE resources
2734 * @adap: the adapter
2735 *
2736 * Frees resources used by the SGE queue sets.
2737 */
2738void t3_free_sge_resources(struct adapter *adap)
2739{
2740 int i;
2741
2742 for (i = 0; i < SGE_QSETS; ++i)
2743 t3_free_qset(adap, &adap->sge.qs[i]);
2744}
2745
2746/**
2747 * t3_sge_start - enable SGE
2748 * @adap: the adapter
2749 *
2750 * Enables the SGE for DMAs. This is the last step in starting packet
2751 * transfers.
2752 */
2753void t3_sge_start(struct adapter *adap)
2754{
2755 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2756}
2757
2758/**
2759 * t3_sge_stop - disable SGE operation
2760 * @adap: the adapter
2761 *
2762 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2763 * from error interrupts) or from normal process context. In the latter
2764 * case it also disables any pending queue restart tasklets. Note that
2765 * if it is called in interrupt context it cannot disable the restart
2766 * tasklets as it cannot wait, however the tasklets will have no effect
2767 * since the doorbells are disabled and the driver will call this again
2768 * later from process context, at which time the tasklets will be stopped
2769 * if they are still running.
2770 */
2771void t3_sge_stop(struct adapter *adap)
2772{
2773 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
2774 if (!in_interrupt()) {
2775 int i;
2776
2777 for (i = 0; i < SGE_QSETS; ++i) {
2778 struct sge_qset *qs = &adap->sge.qs[i];
2779
2780 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
2781 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
2782 }
2783 }
2784}
2785
2786/**
2787 * t3_sge_init - initialize SGE
2788 * @adap: the adapter
2789 * @p: the SGE parameters
2790 *
2791 * Performs SGE initialization needed every time after a chip reset.
2792 * We do not initialize any of the queue sets here, instead the driver
2793 * top-level must request those individually. We also do not enable DMA
2794 * here, that should be done after the queues have been set up.
2795 */
2796void t3_sge_init(struct adapter *adap, struct sge_params *p)
2797{
2798 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
2799
2800 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
2801 F_CQCRDTCTRL |
2802 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
2803 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
2804#if SGE_NUM_GENBITS == 1
2805 ctrl |= F_EGRGENCTRL;
2806#endif
2807 if (adap->params.rev > 0) {
2808 if (!(adap->flags & (USING_MSIX | USING_MSI)))
2809 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
2810 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
2811 }
2812 t3_write_reg(adap, A_SG_CONTROL, ctrl);
2813 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
2814 V_LORCQDRBTHRSH(512));
2815 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
2816 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
6195c71d 2817 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
4d22de3e
DLR
2818 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
2819 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
2820 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
2821 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
2822 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
2823 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
2824}
2825
2826/**
2827 * t3_sge_prep - one-time SGE initialization
2828 * @adap: the associated adapter
2829 * @p: SGE parameters
2830 *
2831 * Performs one-time initialization of SGE SW state. Includes determining
2832 * defaults for the assorted SGE parameters, which admins can change until
2833 * they are used to initialize the SGE.
2834 */
2835void __devinit t3_sge_prep(struct adapter *adap, struct sge_params *p)
2836{
2837 int i;
2838
2839 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
2840 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2841
2842 for (i = 0; i < SGE_QSETS; ++i) {
2843 struct qset_params *q = p->qset + i;
2844
2845 q->polling = adap->params.rev > 0;
2846 q->coalesce_usecs = 5;
2847 q->rspq_size = 1024;
e0994eb1 2848 q->fl_size = 1024;
4d22de3e
DLR
2849 q->jumbo_size = 512;
2850 q->txq_size[TXQ_ETH] = 1024;
2851 q->txq_size[TXQ_OFLD] = 1024;
2852 q->txq_size[TXQ_CTRL] = 256;
2853 q->cong_thres = 0;
2854 }
2855
2856 spin_lock_init(&adap->sge.reg_lock);
2857}
2858
2859/**
2860 * t3_get_desc - dump an SGE descriptor for debugging purposes
2861 * @qs: the queue set
2862 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2863 * @idx: the descriptor index in the queue
2864 * @data: where to dump the descriptor contents
2865 *
2866 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2867 * size of the descriptor.
2868 */
2869int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2870 unsigned char *data)
2871{
2872 if (qnum >= 6)
2873 return -EINVAL;
2874
2875 if (qnum < 3) {
2876 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2877 return -EINVAL;
2878 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2879 return sizeof(struct tx_desc);
2880 }
2881
2882 if (qnum == 3) {
2883 if (!qs->rspq.desc || idx >= qs->rspq.size)
2884 return -EINVAL;
2885 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2886 return sizeof(struct rsp_desc);
2887 }
2888
2889 qnum -= 4;
2890 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2891 return -EINVAL;
2892 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2893 return sizeof(struct rx_desc);
2894}