2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 #include <linux/bitmap.h>
38 #include <linux/crc32.h>
39 #include <linux/ctype.h>
40 #include <linux/debugfs.h>
41 #include <linux/err.h>
42 #include <linux/etherdevice.h>
43 #include <linux/firmware.h>
44 #include <linux/if_vlan.h>
45 #include <linux/init.h>
46 #include <linux/log2.h>
47 #include <linux/mdio.h>
48 #include <linux/module.h>
49 #include <linux/moduleparam.h>
50 #include <linux/mutex.h>
51 #include <linux/netdevice.h>
52 #include <linux/pci.h>
53 #include <linux/aer.h>
54 #include <linux/rtnetlink.h>
55 #include <linux/sched.h>
56 #include <linux/seq_file.h>
57 #include <linux/sockios.h>
58 #include <linux/vmalloc.h>
59 #include <linux/workqueue.h>
60 #include <net/neighbour.h>
61 #include <net/netevent.h>
62 #include <asm/uaccess.h>
70 #define DRV_VERSION "1.3.0-ko"
71 #define DRV_DESC "Chelsio T4 Network Driver"
74 * Max interrupt hold-off timer value in us. Queues fall back to this value
75 * under extreme memory pressure so it's largish to give the system time to
78 #define MAX_SGE_TIMERVAL 200U
82 * Virtual Function provisioning constants. We need two extra Ingress Queues
83 * with Interrupt capability to serve as the VF's Firmware Event Queue and
84 * Forwarded Interrupt Queue (when using MSI mode) -- neither will have Free
85 * Lists associated with them). For each Ethernet/Control Egress Queue and
86 * for each Free List, we need an Egress Context.
89 VFRES_NPORTS = 1, /* # of "ports" per VF */
90 VFRES_NQSETS = 2, /* # of "Queue Sets" per VF */
92 VFRES_NVI = VFRES_NPORTS, /* # of Virtual Interfaces */
93 VFRES_NETHCTRL = VFRES_NQSETS, /* # of EQs used for ETH or CTRL Qs */
94 VFRES_NIQFLINT = VFRES_NQSETS+2,/* # of ingress Qs/w Free List(s)/intr */
95 VFRES_NIQ = 0, /* # of non-fl/int ingress queues */
96 VFRES_NEQ = VFRES_NQSETS*2, /* # of egress queues */
97 VFRES_TC = 0, /* PCI-E traffic class */
98 VFRES_NEXACTF = 16, /* # of exact MPS filters */
100 VFRES_R_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF|FW_CMD_CAP_PORT,
101 VFRES_WX_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF,
105 * Provide a Port Access Rights Mask for the specified PF/VF. This is very
106 * static and likely not to be useful in the long run. We really need to
107 * implement some form of persistent configuration which the firmware
110 static unsigned int pfvfres_pmask(struct adapter *adapter,
111 unsigned int pf, unsigned int vf)
113 unsigned int portn, portvec;
116 * Give PF's access to all of the ports.
119 return FW_PFVF_CMD_PMASK_MASK;
122 * For VFs, we'll assign them access to the ports based purely on the
123 * PF. We assign active ports in order, wrapping around if there are
124 * fewer active ports than PFs: e.g. active port[pf % nports].
125 * Unfortunately the adapter's port_info structs haven't been
126 * initialized yet so we have to compute this.
128 if (adapter->params.nports == 0)
131 portn = pf % adapter->params.nports;
132 portvec = adapter->params.portvec;
135 * Isolate the lowest set bit in the port vector. If we're at
136 * the port number that we want, return that as the pmask.
137 * otherwise mask that bit out of the port vector and
138 * decrement our port number ...
140 unsigned int pmask = portvec ^ (portvec & (portvec-1));
151 MEMWIN0_APERTURE = 65536,
152 MEMWIN0_BASE = 0x30000,
153 MEMWIN1_APERTURE = 32768,
154 MEMWIN1_BASE = 0x28000,
155 MEMWIN2_APERTURE = 2048,
156 MEMWIN2_BASE = 0x1b800,
160 MAX_TXQ_ENTRIES = 16384,
161 MAX_CTRL_TXQ_ENTRIES = 1024,
162 MAX_RSPQ_ENTRIES = 16384,
163 MAX_RX_BUFFERS = 16384,
164 MIN_TXQ_ENTRIES = 32,
165 MIN_CTRL_TXQ_ENTRIES = 32,
166 MIN_RSPQ_ENTRIES = 128,
170 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
171 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
172 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
174 #define CH_DEVICE(devid, data) { PCI_VDEVICE(CHELSIO, devid), (data) }
176 static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = {
177 CH_DEVICE(0xa000, 0), /* PE10K */
178 CH_DEVICE(0x4001, 0),
179 CH_DEVICE(0x4002, 0),
180 CH_DEVICE(0x4003, 0),
181 CH_DEVICE(0x4004, 0),
182 CH_DEVICE(0x4005, 0),
183 CH_DEVICE(0x4006, 0),
184 CH_DEVICE(0x4007, 0),
185 CH_DEVICE(0x4008, 0),
186 CH_DEVICE(0x4009, 0),
187 CH_DEVICE(0x400a, 0),
191 #define FW_FNAME "cxgb4/t4fw.bin"
193 MODULE_DESCRIPTION(DRV_DESC);
194 MODULE_AUTHOR("Chelsio Communications");
195 MODULE_LICENSE("Dual BSD/GPL");
196 MODULE_VERSION(DRV_VERSION);
197 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
198 MODULE_FIRMWARE(FW_FNAME);
200 static int dflt_msg_enable = DFLT_MSG_ENABLE;
202 module_param(dflt_msg_enable, int, 0644);
203 MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap");
206 * The driver uses the best interrupt scheme available on a platform in the
207 * order MSI-X, MSI, legacy INTx interrupts. This parameter determines which
208 * of these schemes the driver may consider as follows:
210 * msi = 2: choose from among all three options
211 * msi = 1: only consider MSI and INTx interrupts
212 * msi = 0: force INTx interrupts
216 module_param(msi, int, 0644);
217 MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
220 * Queue interrupt hold-off timer values. Queues default to the first of these
223 static unsigned int intr_holdoff[SGE_NTIMERS - 1] = { 5, 10, 20, 50, 100 };
225 module_param_array(intr_holdoff, uint, NULL, 0644);
226 MODULE_PARM_DESC(intr_holdoff, "values for queue interrupt hold-off timers "
227 "0..4 in microseconds");
229 static unsigned int intr_cnt[SGE_NCOUNTERS - 1] = { 4, 8, 16 };
231 module_param_array(intr_cnt, uint, NULL, 0644);
232 MODULE_PARM_DESC(intr_cnt,
233 "thresholds 1..3 for queue interrupt packet counters");
237 #ifdef CONFIG_PCI_IOV
238 module_param(vf_acls, bool, 0644);
239 MODULE_PARM_DESC(vf_acls, "if set enable virtualization L2 ACL enforcement");
241 static unsigned int num_vf[4];
243 module_param_array(num_vf, uint, NULL, 0644);
244 MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
247 static struct dentry *cxgb4_debugfs_root;
249 static LIST_HEAD(adapter_list);
250 static DEFINE_MUTEX(uld_mutex);
251 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
252 static const char *uld_str[] = { "RDMA", "iSCSI" };
254 static void link_report(struct net_device *dev)
256 if (!netif_carrier_ok(dev))
257 netdev_info(dev, "link down\n");
259 static const char *fc[] = { "no", "Rx", "Tx", "Tx/Rx" };
261 const char *s = "10Mbps";
262 const struct port_info *p = netdev_priv(dev);
264 switch (p->link_cfg.speed) {
276 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s,
281 void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
283 struct net_device *dev = adapter->port[port_id];
285 /* Skip changes from disabled ports. */
286 if (netif_running(dev) && link_stat != netif_carrier_ok(dev)) {
288 netif_carrier_on(dev);
290 netif_carrier_off(dev);
296 void t4_os_portmod_changed(const struct adapter *adap, int port_id)
298 static const char *mod_str[] = {
299 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
302 const struct net_device *dev = adap->port[port_id];
303 const struct port_info *pi = netdev_priv(dev);
305 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
306 netdev_info(dev, "port module unplugged\n");
307 else if (pi->mod_type < ARRAY_SIZE(mod_str))
308 netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
312 * Configure the exact and hash address filters to handle a port's multicast
313 * and secondary unicast MAC addresses.
315 static int set_addr_filters(const struct net_device *dev, bool sleep)
323 const struct netdev_hw_addr *ha;
324 int uc_cnt = netdev_uc_count(dev);
325 int mc_cnt = netdev_mc_count(dev);
326 const struct port_info *pi = netdev_priv(dev);
327 unsigned int mb = pi->adapter->fn;
329 /* first do the secondary unicast addresses */
330 netdev_for_each_uc_addr(ha, dev) {
331 addr[naddr++] = ha->addr;
332 if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
333 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
334 naddr, addr, filt_idx, &uhash, sleep);
343 /* next set up the multicast addresses */
344 netdev_for_each_mc_addr(ha, dev) {
345 addr[naddr++] = ha->addr;
346 if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
347 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
348 naddr, addr, filt_idx, &mhash, sleep);
357 return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0,
358 uhash | mhash, sleep);
362 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
363 * If @mtu is -1 it is left unchanged.
365 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
368 struct port_info *pi = netdev_priv(dev);
370 ret = set_addr_filters(dev, sleep_ok);
372 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, mtu,
373 (dev->flags & IFF_PROMISC) ? 1 : 0,
374 (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
380 * link_start - enable a port
381 * @dev: the port to enable
383 * Performs the MAC and PHY actions needed to enable a port.
385 static int link_start(struct net_device *dev)
388 struct port_info *pi = netdev_priv(dev);
389 unsigned int mb = pi->adapter->fn;
392 * We do not set address filters and promiscuity here, the stack does
393 * that step explicitly.
395 ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
396 pi->vlan_grp != NULL, true);
398 ret = t4_change_mac(pi->adapter, mb, pi->viid,
399 pi->xact_addr_filt, dev->dev_addr, true,
402 pi->xact_addr_filt = ret;
407 ret = t4_link_start(pi->adapter, mb, pi->tx_chan,
410 ret = t4_enable_vi(pi->adapter, mb, pi->viid, true, true);
415 * Response queue handler for the FW event queue.
417 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
418 const struct pkt_gl *gl)
420 u8 opcode = ((const struct rss_header *)rsp)->opcode;
422 rsp++; /* skip RSS header */
423 if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
424 const struct cpl_sge_egr_update *p = (void *)rsp;
425 unsigned int qid = EGR_QID(ntohl(p->opcode_qid));
426 struct sge_txq *txq = q->adap->sge.egr_map[qid];
429 if ((u8 *)txq < (u8 *)q->adap->sge.ethrxq) {
430 struct sge_eth_txq *eq;
432 eq = container_of(txq, struct sge_eth_txq, q);
433 netif_tx_wake_queue(eq->txq);
435 struct sge_ofld_txq *oq;
437 oq = container_of(txq, struct sge_ofld_txq, q);
438 tasklet_schedule(&oq->qresume_tsk);
440 } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
441 const struct cpl_fw6_msg *p = (void *)rsp;
444 t4_handle_fw_rpl(q->adap, p->data);
445 } else if (opcode == CPL_L2T_WRITE_RPL) {
446 const struct cpl_l2t_write_rpl *p = (void *)rsp;
448 do_l2t_write_rpl(q->adap, p);
450 dev_err(q->adap->pdev_dev,
451 "unexpected CPL %#x on FW event queue\n", opcode);
456 * uldrx_handler - response queue handler for ULD queues
457 * @q: the response queue that received the packet
458 * @rsp: the response queue descriptor holding the offload message
459 * @gl: the gather list of packet fragments
461 * Deliver an ingress offload packet to a ULD. All processing is done by
462 * the ULD, we just maintain statistics.
464 static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
465 const struct pkt_gl *gl)
467 struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
469 if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
475 else if (gl == CXGB4_MSG_AN)
482 static void disable_msi(struct adapter *adapter)
484 if (adapter->flags & USING_MSIX) {
485 pci_disable_msix(adapter->pdev);
486 adapter->flags &= ~USING_MSIX;
487 } else if (adapter->flags & USING_MSI) {
488 pci_disable_msi(adapter->pdev);
489 adapter->flags &= ~USING_MSI;
494 * Interrupt handler for non-data events used with MSI-X.
496 static irqreturn_t t4_nondata_intr(int irq, void *cookie)
498 struct adapter *adap = cookie;
500 u32 v = t4_read_reg(adap, MYPF_REG(PL_PF_INT_CAUSE));
503 t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE), v);
505 t4_slow_intr_handler(adap);
510 * Name the MSI-X interrupts.
512 static void name_msix_vecs(struct adapter *adap)
514 int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc) - 1;
516 /* non-data interrupts */
517 snprintf(adap->msix_info[0].desc, n, "%s", adap->name);
518 adap->msix_info[0].desc[n] = 0;
521 snprintf(adap->msix_info[1].desc, n, "%s-FWeventq", adap->name);
522 adap->msix_info[1].desc[n] = 0;
524 /* Ethernet queues */
525 for_each_port(adap, j) {
526 struct net_device *d = adap->port[j];
527 const struct port_info *pi = netdev_priv(d);
529 for (i = 0; i < pi->nqsets; i++, msi_idx++) {
530 snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
532 adap->msix_info[msi_idx].desc[n] = 0;
537 for_each_ofldrxq(&adap->sge, i) {
538 snprintf(adap->msix_info[msi_idx].desc, n, "%s-ofld%d",
540 adap->msix_info[msi_idx++].desc[n] = 0;
542 for_each_rdmarxq(&adap->sge, i) {
543 snprintf(adap->msix_info[msi_idx].desc, n, "%s-rdma%d",
545 adap->msix_info[msi_idx++].desc[n] = 0;
549 static int request_msix_queue_irqs(struct adapter *adap)
551 struct sge *s = &adap->sge;
552 int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi = 2;
554 err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
555 adap->msix_info[1].desc, &s->fw_evtq);
559 for_each_ethrxq(s, ethqidx) {
560 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
561 adap->msix_info[msi].desc,
562 &s->ethrxq[ethqidx].rspq);
567 for_each_ofldrxq(s, ofldqidx) {
568 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
569 adap->msix_info[msi].desc,
570 &s->ofldrxq[ofldqidx].rspq);
575 for_each_rdmarxq(s, rdmaqidx) {
576 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
577 adap->msix_info[msi].desc,
578 &s->rdmarxq[rdmaqidx].rspq);
586 while (--rdmaqidx >= 0)
587 free_irq(adap->msix_info[--msi].vec,
588 &s->rdmarxq[rdmaqidx].rspq);
589 while (--ofldqidx >= 0)
590 free_irq(adap->msix_info[--msi].vec,
591 &s->ofldrxq[ofldqidx].rspq);
592 while (--ethqidx >= 0)
593 free_irq(adap->msix_info[--msi].vec, &s->ethrxq[ethqidx].rspq);
594 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
598 static void free_msix_queue_irqs(struct adapter *adap)
601 struct sge *s = &adap->sge;
603 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
604 for_each_ethrxq(s, i)
605 free_irq(adap->msix_info[msi++].vec, &s->ethrxq[i].rspq);
606 for_each_ofldrxq(s, i)
607 free_irq(adap->msix_info[msi++].vec, &s->ofldrxq[i].rspq);
608 for_each_rdmarxq(s, i)
609 free_irq(adap->msix_info[msi++].vec, &s->rdmarxq[i].rspq);
613 * write_rss - write the RSS table for a given port
615 * @queues: array of queue indices for RSS
617 * Sets up the portion of the HW RSS table for the port's VI to distribute
618 * packets to the Rx queues in @queues.
620 static int write_rss(const struct port_info *pi, const u16 *queues)
624 const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset];
626 rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL);
630 /* map the queue indices to queue ids */
631 for (i = 0; i < pi->rss_size; i++, queues++)
632 rss[i] = q[*queues].rspq.abs_id;
634 err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0,
635 pi->rss_size, rss, pi->rss_size);
641 * setup_rss - configure RSS
644 * Sets up RSS for each port.
646 static int setup_rss(struct adapter *adap)
650 for_each_port(adap, i) {
651 const struct port_info *pi = adap2pinfo(adap, i);
653 err = write_rss(pi, pi->rss);
661 * Wait until all NAPI handlers are descheduled.
663 static void quiesce_rx(struct adapter *adap)
667 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
668 struct sge_rspq *q = adap->sge.ingr_map[i];
671 napi_disable(&q->napi);
676 * Enable NAPI scheduling and interrupt generation for all Rx queues.
678 static void enable_rx(struct adapter *adap)
682 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
683 struct sge_rspq *q = adap->sge.ingr_map[i];
688 napi_enable(&q->napi);
689 /* 0-increment GTS to start the timer and enable interrupts */
690 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
691 SEINTARM(q->intr_params) |
692 INGRESSQID(q->cntxt_id));
697 * setup_sge_queues - configure SGE Tx/Rx/response queues
700 * Determines how many sets of SGE queues to use and initializes them.
701 * We support multiple queue sets per port if we have MSI-X, otherwise
702 * just one queue set per port.
704 static int setup_sge_queues(struct adapter *adap)
706 int err, msi_idx, i, j;
707 struct sge *s = &adap->sge;
709 bitmap_zero(s->starving_fl, MAX_EGRQ);
710 bitmap_zero(s->txq_maperr, MAX_EGRQ);
712 if (adap->flags & USING_MSIX)
713 msi_idx = 1; /* vector 0 is for non-queue interrupts */
715 err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
719 msi_idx = -((int)s->intrq.abs_id + 1);
722 err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
723 msi_idx, NULL, fwevtq_handler);
725 freeout: t4_free_sge_resources(adap);
729 for_each_port(adap, i) {
730 struct net_device *dev = adap->port[i];
731 struct port_info *pi = netdev_priv(dev);
732 struct sge_eth_rxq *q = &s->ethrxq[pi->first_qset];
733 struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
735 for (j = 0; j < pi->nqsets; j++, q++) {
738 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
744 memset(&q->stats, 0, sizeof(q->stats));
746 for (j = 0; j < pi->nqsets; j++, t++) {
747 err = t4_sge_alloc_eth_txq(adap, t, dev,
748 netdev_get_tx_queue(dev, j),
749 s->fw_evtq.cntxt_id);
755 j = s->ofldqsets / adap->params.nports; /* ofld queues per channel */
756 for_each_ofldrxq(s, i) {
757 struct sge_ofld_rxq *q = &s->ofldrxq[i];
758 struct net_device *dev = adap->port[i / j];
762 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
763 &q->fl, uldrx_handler);
766 memset(&q->stats, 0, sizeof(q->stats));
767 s->ofld_rxq[i] = q->rspq.abs_id;
768 err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], dev,
769 s->fw_evtq.cntxt_id);
774 for_each_rdmarxq(s, i) {
775 struct sge_ofld_rxq *q = &s->rdmarxq[i];
779 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
780 msi_idx, &q->fl, uldrx_handler);
783 memset(&q->stats, 0, sizeof(q->stats));
784 s->rdma_rxq[i] = q->rspq.abs_id;
787 for_each_port(adap, i) {
789 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
790 * have RDMA queues, and that's the right value.
792 err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
794 s->rdmarxq[i].rspq.cntxt_id);
799 t4_write_reg(adap, MPS_TRC_RSS_CONTROL,
800 RSSCONTROL(netdev2pinfo(adap->port[0])->tx_chan) |
801 QUEUENUMBER(s->ethrxq[0].rspq.abs_id));
806 * Returns 0 if new FW was successfully loaded, a positive errno if a load was
807 * started but failed, and a negative errno if flash load couldn't start.
809 static int upgrade_fw(struct adapter *adap)
813 const struct fw_hdr *hdr;
814 const struct firmware *fw;
815 struct device *dev = adap->pdev_dev;
817 ret = request_firmware(&fw, FW_FNAME, dev);
819 dev_err(dev, "unable to load firmware image " FW_FNAME
820 ", error %d\n", ret);
824 hdr = (const struct fw_hdr *)fw->data;
825 vers = ntohl(hdr->fw_ver);
826 if (FW_HDR_FW_VER_MAJOR_GET(vers) != FW_VERSION_MAJOR) {
827 ret = -EINVAL; /* wrong major version, won't do */
832 * If the flash FW is unusable or we found something newer, load it.
834 if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != FW_VERSION_MAJOR ||
835 vers > adap->params.fw_vers) {
836 ret = -t4_load_fw(adap, fw->data, fw->size);
838 dev_info(dev, "firmware upgraded to version %pI4 from "
839 FW_FNAME "\n", &hdr->fw_ver);
841 out: release_firmware(fw);
846 * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
847 * The allocated memory is cleared.
849 void *t4_alloc_mem(size_t size)
851 void *p = kmalloc(size, GFP_KERNEL);
861 * Free memory allocated through alloc_mem().
863 void t4_free_mem(void *addr)
865 if (is_vmalloc_addr(addr))
871 static inline int is_offload(const struct adapter *adap)
873 return adap->params.offload;
877 * Implementation of ethtool operations.
880 static u32 get_msglevel(struct net_device *dev)
882 return netdev2adap(dev)->msg_enable;
885 static void set_msglevel(struct net_device *dev, u32 val)
887 netdev2adap(dev)->msg_enable = val;
890 static char stats_strings[][ETH_GSTRING_LEN] = {
893 "TxBroadcastFrames ",
894 "TxMulticastFrames ",
902 "TxFrames512To1023 ",
903 "TxFrames1024To1518 ",
904 "TxFrames1519ToMax ",
919 "RxBroadcastFrames ",
920 "RxMulticastFrames ",
934 "RxFrames512To1023 ",
935 "RxFrames1024To1518 ",
936 "RxFrames1519ToMax ",
948 "RxBG0FramesDropped ",
949 "RxBG1FramesDropped ",
950 "RxBG2FramesDropped ",
951 "RxBG3FramesDropped ",
966 static int get_sset_count(struct net_device *dev, int sset)
970 return ARRAY_SIZE(stats_strings);
976 #define T4_REGMAP_SIZE (160 * 1024)
978 static int get_regs_len(struct net_device *dev)
980 return T4_REGMAP_SIZE;
983 static int get_eeprom_len(struct net_device *dev)
988 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
990 struct adapter *adapter = netdev2adap(dev);
992 strcpy(info->driver, KBUILD_MODNAME);
993 strcpy(info->version, DRV_VERSION);
994 strcpy(info->bus_info, pci_name(adapter->pdev));
996 if (!adapter->params.fw_vers)
997 strcpy(info->fw_version, "N/A");
999 snprintf(info->fw_version, sizeof(info->fw_version),
1000 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1001 FW_HDR_FW_VER_MAJOR_GET(adapter->params.fw_vers),
1002 FW_HDR_FW_VER_MINOR_GET(adapter->params.fw_vers),
1003 FW_HDR_FW_VER_MICRO_GET(adapter->params.fw_vers),
1004 FW_HDR_FW_VER_BUILD_GET(adapter->params.fw_vers),
1005 FW_HDR_FW_VER_MAJOR_GET(adapter->params.tp_vers),
1006 FW_HDR_FW_VER_MINOR_GET(adapter->params.tp_vers),
1007 FW_HDR_FW_VER_MICRO_GET(adapter->params.tp_vers),
1008 FW_HDR_FW_VER_BUILD_GET(adapter->params.tp_vers));
1011 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
1013 if (stringset == ETH_SS_STATS)
1014 memcpy(data, stats_strings, sizeof(stats_strings));
1018 * port stats maintained per queue of the port. They should be in the same
1019 * order as in stats_strings above.
1021 struct queue_port_stats {
1031 static void collect_sge_port_stats(const struct adapter *adap,
1032 const struct port_info *p, struct queue_port_stats *s)
1035 const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
1036 const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
1038 memset(s, 0, sizeof(*s));
1039 for (i = 0; i < p->nqsets; i++, rx++, tx++) {
1041 s->tx_csum += tx->tx_cso;
1042 s->rx_csum += rx->stats.rx_cso;
1043 s->vlan_ex += rx->stats.vlan_ex;
1044 s->vlan_ins += tx->vlan_ins;
1045 s->gro_pkts += rx->stats.lro_pkts;
1046 s->gro_merged += rx->stats.lro_merged;
1050 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
1053 struct port_info *pi = netdev_priv(dev);
1054 struct adapter *adapter = pi->adapter;
1056 t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data);
1058 data += sizeof(struct port_stats) / sizeof(u64);
1059 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1063 * Return a version number to identify the type of adapter. The scheme is:
1064 * - bits 0..9: chip version
1065 * - bits 10..15: chip revision
1066 * - bits 16..23: register dump version
1068 static inline unsigned int mk_adap_vers(const struct adapter *ap)
1070 return 4 | (ap->params.rev << 10) | (1 << 16);
1073 static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start,
1076 u32 *p = buf + start;
1078 for ( ; start <= end; start += sizeof(u32))
1079 *p++ = t4_read_reg(ap, start);
1082 static void get_regs(struct net_device *dev, struct ethtool_regs *regs,
1085 static const unsigned int reg_ranges[] = {
1306 struct adapter *ap = netdev2adap(dev);
1308 regs->version = mk_adap_vers(ap);
1310 memset(buf, 0, T4_REGMAP_SIZE);
1311 for (i = 0; i < ARRAY_SIZE(reg_ranges); i += 2)
1312 reg_block_dump(ap, buf, reg_ranges[i], reg_ranges[i + 1]);
1315 static int restart_autoneg(struct net_device *dev)
1317 struct port_info *p = netdev_priv(dev);
1319 if (!netif_running(dev))
1321 if (p->link_cfg.autoneg != AUTONEG_ENABLE)
1323 t4_restart_aneg(p->adapter, p->adapter->fn, p->tx_chan);
1327 static int identify_port(struct net_device *dev, u32 data)
1329 struct adapter *adap = netdev2adap(dev);
1332 data = 2; /* default to 2 seconds */
1334 return t4_identify_port(adap, adap->fn, netdev2pinfo(dev)->viid,
1338 static unsigned int from_fw_linkcaps(unsigned int type, unsigned int caps)
1342 if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1343 type == FW_PORT_TYPE_BT_XAUI) {
1345 if (caps & FW_PORT_CAP_SPEED_100M)
1346 v |= SUPPORTED_100baseT_Full;
1347 if (caps & FW_PORT_CAP_SPEED_1G)
1348 v |= SUPPORTED_1000baseT_Full;
1349 if (caps & FW_PORT_CAP_SPEED_10G)
1350 v |= SUPPORTED_10000baseT_Full;
1351 } else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1352 v |= SUPPORTED_Backplane;
1353 if (caps & FW_PORT_CAP_SPEED_1G)
1354 v |= SUPPORTED_1000baseKX_Full;
1355 if (caps & FW_PORT_CAP_SPEED_10G)
1356 v |= SUPPORTED_10000baseKX4_Full;
1357 } else if (type == FW_PORT_TYPE_KR)
1358 v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1359 else if (type == FW_PORT_TYPE_BP_AP)
1360 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC;
1361 else if (type == FW_PORT_TYPE_FIBER_XFI ||
1362 type == FW_PORT_TYPE_FIBER_XAUI || type == FW_PORT_TYPE_SFP)
1363 v |= SUPPORTED_FIBRE;
1365 if (caps & FW_PORT_CAP_ANEG)
1366 v |= SUPPORTED_Autoneg;
1370 static unsigned int to_fw_linkcaps(unsigned int caps)
1374 if (caps & ADVERTISED_100baseT_Full)
1375 v |= FW_PORT_CAP_SPEED_100M;
1376 if (caps & ADVERTISED_1000baseT_Full)
1377 v |= FW_PORT_CAP_SPEED_1G;
1378 if (caps & ADVERTISED_10000baseT_Full)
1379 v |= FW_PORT_CAP_SPEED_10G;
1383 static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1385 const struct port_info *p = netdev_priv(dev);
1387 if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1388 p->port_type == FW_PORT_TYPE_BT_XFI ||
1389 p->port_type == FW_PORT_TYPE_BT_XAUI)
1390 cmd->port = PORT_TP;
1391 else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1392 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1393 cmd->port = PORT_FIBRE;
1394 else if (p->port_type == FW_PORT_TYPE_SFP) {
1395 if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1396 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1397 cmd->port = PORT_DA;
1399 cmd->port = PORT_FIBRE;
1401 cmd->port = PORT_OTHER;
1403 if (p->mdio_addr >= 0) {
1404 cmd->phy_address = p->mdio_addr;
1405 cmd->transceiver = XCVR_EXTERNAL;
1406 cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1407 MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1409 cmd->phy_address = 0; /* not really, but no better option */
1410 cmd->transceiver = XCVR_INTERNAL;
1411 cmd->mdio_support = 0;
1414 cmd->supported = from_fw_linkcaps(p->port_type, p->link_cfg.supported);
1415 cmd->advertising = from_fw_linkcaps(p->port_type,
1416 p->link_cfg.advertising);
1417 cmd->speed = netif_carrier_ok(dev) ? p->link_cfg.speed : 0;
1418 cmd->duplex = DUPLEX_FULL;
1419 cmd->autoneg = p->link_cfg.autoneg;
1425 static unsigned int speed_to_caps(int speed)
1427 if (speed == SPEED_100)
1428 return FW_PORT_CAP_SPEED_100M;
1429 if (speed == SPEED_1000)
1430 return FW_PORT_CAP_SPEED_1G;
1431 if (speed == SPEED_10000)
1432 return FW_PORT_CAP_SPEED_10G;
1436 static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1439 struct port_info *p = netdev_priv(dev);
1440 struct link_config *lc = &p->link_cfg;
1442 if (cmd->duplex != DUPLEX_FULL) /* only full-duplex supported */
1445 if (!(lc->supported & FW_PORT_CAP_ANEG)) {
1447 * PHY offers a single speed. See if that's what's
1450 if (cmd->autoneg == AUTONEG_DISABLE &&
1451 (lc->supported & speed_to_caps(cmd->speed)))
1456 if (cmd->autoneg == AUTONEG_DISABLE) {
1457 cap = speed_to_caps(cmd->speed);
1459 if (!(lc->supported & cap) || cmd->speed == SPEED_1000 ||
1460 cmd->speed == SPEED_10000)
1462 lc->requested_speed = cap;
1463 lc->advertising = 0;
1465 cap = to_fw_linkcaps(cmd->advertising);
1466 if (!(lc->supported & cap))
1468 lc->requested_speed = 0;
1469 lc->advertising = cap | FW_PORT_CAP_ANEG;
1471 lc->autoneg = cmd->autoneg;
1473 if (netif_running(dev))
1474 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1479 static void get_pauseparam(struct net_device *dev,
1480 struct ethtool_pauseparam *epause)
1482 struct port_info *p = netdev_priv(dev);
1484 epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1485 epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
1486 epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
1489 static int set_pauseparam(struct net_device *dev,
1490 struct ethtool_pauseparam *epause)
1492 struct port_info *p = netdev_priv(dev);
1493 struct link_config *lc = &p->link_cfg;
1495 if (epause->autoneg == AUTONEG_DISABLE)
1496 lc->requested_fc = 0;
1497 else if (lc->supported & FW_PORT_CAP_ANEG)
1498 lc->requested_fc = PAUSE_AUTONEG;
1502 if (epause->rx_pause)
1503 lc->requested_fc |= PAUSE_RX;
1504 if (epause->tx_pause)
1505 lc->requested_fc |= PAUSE_TX;
1506 if (netif_running(dev))
1507 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1512 static u32 get_rx_csum(struct net_device *dev)
1514 struct port_info *p = netdev_priv(dev);
1516 return p->rx_offload & RX_CSO;
1519 static int set_rx_csum(struct net_device *dev, u32 data)
1521 struct port_info *p = netdev_priv(dev);
1524 p->rx_offload |= RX_CSO;
1526 p->rx_offload &= ~RX_CSO;
1530 static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1532 const struct port_info *pi = netdev_priv(dev);
1533 const struct sge *s = &pi->adapter->sge;
1535 e->rx_max_pending = MAX_RX_BUFFERS;
1536 e->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1537 e->rx_jumbo_max_pending = 0;
1538 e->tx_max_pending = MAX_TXQ_ENTRIES;
1540 e->rx_pending = s->ethrxq[pi->first_qset].fl.size - 8;
1541 e->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1542 e->rx_jumbo_pending = 0;
1543 e->tx_pending = s->ethtxq[pi->first_qset].q.size;
1546 static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1549 const struct port_info *pi = netdev_priv(dev);
1550 struct adapter *adapter = pi->adapter;
1551 struct sge *s = &adapter->sge;
1553 if (e->rx_pending > MAX_RX_BUFFERS || e->rx_jumbo_pending ||
1554 e->tx_pending > MAX_TXQ_ENTRIES ||
1555 e->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1556 e->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1557 e->rx_pending < MIN_FL_ENTRIES || e->tx_pending < MIN_TXQ_ENTRIES)
1560 if (adapter->flags & FULL_INIT_DONE)
1563 for (i = 0; i < pi->nqsets; ++i) {
1564 s->ethtxq[pi->first_qset + i].q.size = e->tx_pending;
1565 s->ethrxq[pi->first_qset + i].fl.size = e->rx_pending + 8;
1566 s->ethrxq[pi->first_qset + i].rspq.size = e->rx_mini_pending;
1571 static int closest_timer(const struct sge *s, int time)
1573 int i, delta, match = 0, min_delta = INT_MAX;
1575 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1576 delta = time - s->timer_val[i];
1579 if (delta < min_delta) {
1587 static int closest_thres(const struct sge *s, int thres)
1589 int i, delta, match = 0, min_delta = INT_MAX;
1591 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1592 delta = thres - s->counter_val[i];
1595 if (delta < min_delta) {
1604 * Return a queue's interrupt hold-off time in us. 0 means no timer.
1606 static unsigned int qtimer_val(const struct adapter *adap,
1607 const struct sge_rspq *q)
1609 unsigned int idx = q->intr_params >> 1;
1611 return idx < SGE_NTIMERS ? adap->sge.timer_val[idx] : 0;
1615 * set_rxq_intr_params - set a queue's interrupt holdoff parameters
1616 * @adap: the adapter
1618 * @us: the hold-off time in us, or 0 to disable timer
1619 * @cnt: the hold-off packet count, or 0 to disable counter
1621 * Sets an Rx queue's interrupt hold-off time and packet count. At least
1622 * one of the two needs to be enabled for the queue to generate interrupts.
1624 static int set_rxq_intr_params(struct adapter *adap, struct sge_rspq *q,
1625 unsigned int us, unsigned int cnt)
1627 if ((us | cnt) == 0)
1634 new_idx = closest_thres(&adap->sge, cnt);
1635 if (q->desc && q->pktcnt_idx != new_idx) {
1636 /* the queue has already been created, update it */
1637 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1638 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1639 FW_PARAMS_PARAM_YZ(q->cntxt_id);
1640 err = t4_set_params(adap, adap->fn, adap->fn, 0, 1, &v,
1645 q->pktcnt_idx = new_idx;
1648 us = us == 0 ? 6 : closest_timer(&adap->sge, us);
1649 q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0);
1653 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1655 const struct port_info *pi = netdev_priv(dev);
1656 struct adapter *adap = pi->adapter;
1658 return set_rxq_intr_params(adap, &adap->sge.ethrxq[pi->first_qset].rspq,
1659 c->rx_coalesce_usecs, c->rx_max_coalesced_frames);
1662 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1664 const struct port_info *pi = netdev_priv(dev);
1665 const struct adapter *adap = pi->adapter;
1666 const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq;
1668 c->rx_coalesce_usecs = qtimer_val(adap, rq);
1669 c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ?
1670 adap->sge.counter_val[rq->pktcnt_idx] : 0;
1675 * Translate a physical EEPROM address to virtual. The first 1K is accessed
1676 * through virtual addresses starting at 31K, the rest is accessed through
1677 * virtual addresses starting at 0. This mapping is correct only for PF0.
1679 static int eeprom_ptov(unsigned int phys_addr)
1681 if (phys_addr < 1024)
1682 return phys_addr + (31 << 10);
1683 if (phys_addr < EEPROMSIZE)
1684 return phys_addr - 1024;
1689 * The next two routines implement eeprom read/write from physical addresses.
1690 * The physical->virtual translation is correct only for PF0.
1692 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
1694 int vaddr = eeprom_ptov(phys_addr);
1697 vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v);
1698 return vaddr < 0 ? vaddr : 0;
1701 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
1703 int vaddr = eeprom_ptov(phys_addr);
1706 vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v);
1707 return vaddr < 0 ? vaddr : 0;
1710 #define EEPROM_MAGIC 0x38E2F10C
1712 static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
1716 struct adapter *adapter = netdev2adap(dev);
1718 u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
1722 e->magic = EEPROM_MAGIC;
1723 for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4)
1724 err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
1727 memcpy(data, buf + e->offset, e->len);
1732 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
1737 u32 aligned_offset, aligned_len, *p;
1738 struct adapter *adapter = netdev2adap(dev);
1740 if (eeprom->magic != EEPROM_MAGIC)
1743 aligned_offset = eeprom->offset & ~3;
1744 aligned_len = (eeprom->len + (eeprom->offset & 3) + 3) & ~3;
1746 if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) {
1748 * RMW possibly needed for first or last words.
1750 buf = kmalloc(aligned_len, GFP_KERNEL);
1753 err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
1754 if (!err && aligned_len > 4)
1755 err = eeprom_rd_phys(adapter,
1756 aligned_offset + aligned_len - 4,
1757 (u32 *)&buf[aligned_len - 4]);
1760 memcpy(buf + (eeprom->offset & 3), data, eeprom->len);
1764 err = t4_seeprom_wp(adapter, false);
1768 for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
1769 err = eeprom_wr_phys(adapter, aligned_offset, *p);
1770 aligned_offset += 4;
1774 err = t4_seeprom_wp(adapter, true);
1781 static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
1784 const struct firmware *fw;
1785 struct adapter *adap = netdev2adap(netdev);
1787 ef->data[sizeof(ef->data) - 1] = '\0';
1788 ret = request_firmware(&fw, ef->data, adap->pdev_dev);
1792 ret = t4_load_fw(adap, fw->data, fw->size);
1793 release_firmware(fw);
1795 dev_info(adap->pdev_dev, "loaded firmware %s\n", ef->data);
1799 #define WOL_SUPPORTED (WAKE_BCAST | WAKE_MAGIC)
1800 #define BCAST_CRC 0xa0ccc1a6
1802 static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1804 wol->supported = WAKE_BCAST | WAKE_MAGIC;
1805 wol->wolopts = netdev2adap(dev)->wol;
1806 memset(&wol->sopass, 0, sizeof(wol->sopass));
1809 static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1812 struct port_info *pi = netdev_priv(dev);
1814 if (wol->wolopts & ~WOL_SUPPORTED)
1816 t4_wol_magic_enable(pi->adapter, pi->tx_chan,
1817 (wol->wolopts & WAKE_MAGIC) ? dev->dev_addr : NULL);
1818 if (wol->wolopts & WAKE_BCAST) {
1819 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0xfe, ~0ULL,
1822 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 1,
1823 ~6ULL, ~0ULL, BCAST_CRC, true);
1825 t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0, 0, 0, 0, false);
1829 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1831 static int set_tso(struct net_device *dev, u32 value)
1834 dev->features |= TSO_FLAGS;
1836 dev->features &= ~TSO_FLAGS;
1840 static int set_flags(struct net_device *dev, u32 flags)
1842 return ethtool_op_set_flags(dev, flags, ETH_FLAG_RXHASH);
1845 static int get_rss_table(struct net_device *dev, struct ethtool_rxfh_indir *p)
1847 const struct port_info *pi = netdev_priv(dev);
1848 unsigned int n = min_t(unsigned int, p->size, pi->rss_size);
1850 p->size = pi->rss_size;
1852 p->ring_index[n] = pi->rss[n];
1856 static int set_rss_table(struct net_device *dev,
1857 const struct ethtool_rxfh_indir *p)
1860 struct port_info *pi = netdev_priv(dev);
1862 if (p->size != pi->rss_size)
1864 for (i = 0; i < p->size; i++)
1865 if (p->ring_index[i] >= pi->nqsets)
1867 for (i = 0; i < p->size; i++)
1868 pi->rss[i] = p->ring_index[i];
1869 if (pi->adapter->flags & FULL_INIT_DONE)
1870 return write_rss(pi, pi->rss);
1874 static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
1877 const struct port_info *pi = netdev_priv(dev);
1879 switch (info->cmd) {
1880 case ETHTOOL_GRXFH: {
1881 unsigned int v = pi->rss_mode;
1884 switch (info->flow_type) {
1886 if (v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
1887 info->data = RXH_IP_SRC | RXH_IP_DST |
1888 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1889 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1890 info->data = RXH_IP_SRC | RXH_IP_DST;
1893 if ((v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) &&
1894 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
1895 info->data = RXH_IP_SRC | RXH_IP_DST |
1896 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1897 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1898 info->data = RXH_IP_SRC | RXH_IP_DST;
1901 case AH_ESP_V4_FLOW:
1903 if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1904 info->data = RXH_IP_SRC | RXH_IP_DST;
1907 if (v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
1908 info->data = RXH_IP_SRC | RXH_IP_DST |
1909 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1910 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1911 info->data = RXH_IP_SRC | RXH_IP_DST;
1914 if ((v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) &&
1915 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
1916 info->data = RXH_IP_SRC | RXH_IP_DST |
1917 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1918 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1919 info->data = RXH_IP_SRC | RXH_IP_DST;
1922 case AH_ESP_V6_FLOW:
1924 if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1925 info->data = RXH_IP_SRC | RXH_IP_DST;
1930 case ETHTOOL_GRXRINGS:
1931 info->data = pi->nqsets;
1937 static struct ethtool_ops cxgb_ethtool_ops = {
1938 .get_settings = get_settings,
1939 .set_settings = set_settings,
1940 .get_drvinfo = get_drvinfo,
1941 .get_msglevel = get_msglevel,
1942 .set_msglevel = set_msglevel,
1943 .get_ringparam = get_sge_param,
1944 .set_ringparam = set_sge_param,
1945 .get_coalesce = get_coalesce,
1946 .set_coalesce = set_coalesce,
1947 .get_eeprom_len = get_eeprom_len,
1948 .get_eeprom = get_eeprom,
1949 .set_eeprom = set_eeprom,
1950 .get_pauseparam = get_pauseparam,
1951 .set_pauseparam = set_pauseparam,
1952 .get_rx_csum = get_rx_csum,
1953 .set_rx_csum = set_rx_csum,
1954 .set_tx_csum = ethtool_op_set_tx_ipv6_csum,
1955 .set_sg = ethtool_op_set_sg,
1956 .get_link = ethtool_op_get_link,
1957 .get_strings = get_strings,
1958 .phys_id = identify_port,
1959 .nway_reset = restart_autoneg,
1960 .get_sset_count = get_sset_count,
1961 .get_ethtool_stats = get_stats,
1962 .get_regs_len = get_regs_len,
1963 .get_regs = get_regs,
1967 .set_flags = set_flags,
1968 .get_rxnfc = get_rxnfc,
1969 .get_rxfh_indir = get_rss_table,
1970 .set_rxfh_indir = set_rss_table,
1971 .flash_device = set_flash,
1978 static int mem_open(struct inode *inode, struct file *file)
1980 file->private_data = inode->i_private;
1984 static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
1988 loff_t avail = file->f_path.dentry->d_inode->i_size;
1989 unsigned int mem = (uintptr_t)file->private_data & 3;
1990 struct adapter *adap = file->private_data - mem;
1996 if (count > avail - pos)
1997 count = avail - pos;
2005 ret = t4_mc_read(adap, pos, data, NULL);
2007 ret = t4_edc_read(adap, mem, pos, data, NULL);
2011 ofst = pos % sizeof(data);
2012 len = min(count, sizeof(data) - ofst);
2013 if (copy_to_user(buf, (u8 *)data + ofst, len))
2020 count = pos - *ppos;
2025 static const struct file_operations mem_debugfs_fops = {
2026 .owner = THIS_MODULE,
2031 static void __devinit add_debugfs_mem(struct adapter *adap, const char *name,
2032 unsigned int idx, unsigned int size_mb)
2036 de = debugfs_create_file(name, S_IRUSR, adap->debugfs_root,
2037 (void *)adap + idx, &mem_debugfs_fops);
2038 if (de && de->d_inode)
2039 de->d_inode->i_size = size_mb << 20;
2042 static int __devinit setup_debugfs(struct adapter *adap)
2046 if (IS_ERR_OR_NULL(adap->debugfs_root))
2049 i = t4_read_reg(adap, MA_TARGET_MEM_ENABLE);
2050 if (i & EDRAM0_ENABLE)
2051 add_debugfs_mem(adap, "edc0", MEM_EDC0, 5);
2052 if (i & EDRAM1_ENABLE)
2053 add_debugfs_mem(adap, "edc1", MEM_EDC1, 5);
2054 if (i & EXT_MEM_ENABLE)
2055 add_debugfs_mem(adap, "mc", MEM_MC,
2056 EXT_MEM_SIZE_GET(t4_read_reg(adap, MA_EXT_MEMORY_BAR)));
2058 debugfs_create_file("l2t", S_IRUSR, adap->debugfs_root, adap,
2064 * upper-layer driver support
2068 * Allocate an active-open TID and set it to the supplied value.
2070 int cxgb4_alloc_atid(struct tid_info *t, void *data)
2074 spin_lock_bh(&t->atid_lock);
2076 union aopen_entry *p = t->afree;
2078 atid = p - t->atid_tab;
2083 spin_unlock_bh(&t->atid_lock);
2086 EXPORT_SYMBOL(cxgb4_alloc_atid);
2089 * Release an active-open TID.
2091 void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
2093 union aopen_entry *p = &t->atid_tab[atid];
2095 spin_lock_bh(&t->atid_lock);
2099 spin_unlock_bh(&t->atid_lock);
2101 EXPORT_SYMBOL(cxgb4_free_atid);
2104 * Allocate a server TID and set it to the supplied value.
2106 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
2110 spin_lock_bh(&t->stid_lock);
2111 if (family == PF_INET) {
2112 stid = find_first_zero_bit(t->stid_bmap, t->nstids);
2113 if (stid < t->nstids)
2114 __set_bit(stid, t->stid_bmap);
2118 stid = bitmap_find_free_region(t->stid_bmap, t->nstids, 2);
2123 t->stid_tab[stid].data = data;
2124 stid += t->stid_base;
2127 spin_unlock_bh(&t->stid_lock);
2130 EXPORT_SYMBOL(cxgb4_alloc_stid);
2133 * Release a server TID.
2135 void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
2137 stid -= t->stid_base;
2138 spin_lock_bh(&t->stid_lock);
2139 if (family == PF_INET)
2140 __clear_bit(stid, t->stid_bmap);
2142 bitmap_release_region(t->stid_bmap, stid, 2);
2143 t->stid_tab[stid].data = NULL;
2145 spin_unlock_bh(&t->stid_lock);
2147 EXPORT_SYMBOL(cxgb4_free_stid);
2150 * Populate a TID_RELEASE WR. Caller must properly size the skb.
2152 static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
2155 struct cpl_tid_release *req;
2157 set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
2158 req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
2159 INIT_TP_WR(req, tid);
2160 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
2164 * Queue a TID release request and if necessary schedule a work queue to
2167 void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
2170 void **p = &t->tid_tab[tid];
2171 struct adapter *adap = container_of(t, struct adapter, tids);
2173 spin_lock_bh(&adap->tid_release_lock);
2174 *p = adap->tid_release_head;
2175 /* Low 2 bits encode the Tx channel number */
2176 adap->tid_release_head = (void **)((uintptr_t)p | chan);
2177 if (!adap->tid_release_task_busy) {
2178 adap->tid_release_task_busy = true;
2179 schedule_work(&adap->tid_release_task);
2181 spin_unlock_bh(&adap->tid_release_lock);
2183 EXPORT_SYMBOL(cxgb4_queue_tid_release);
2186 * Process the list of pending TID release requests.
2188 static void process_tid_release_list(struct work_struct *work)
2190 struct sk_buff *skb;
2191 struct adapter *adap;
2193 adap = container_of(work, struct adapter, tid_release_task);
2195 spin_lock_bh(&adap->tid_release_lock);
2196 while (adap->tid_release_head) {
2197 void **p = adap->tid_release_head;
2198 unsigned int chan = (uintptr_t)p & 3;
2199 p = (void *)p - chan;
2201 adap->tid_release_head = *p;
2203 spin_unlock_bh(&adap->tid_release_lock);
2205 while (!(skb = alloc_skb(sizeof(struct cpl_tid_release),
2207 schedule_timeout_uninterruptible(1);
2209 mk_tid_release(skb, chan, p - adap->tids.tid_tab);
2210 t4_ofld_send(adap, skb);
2211 spin_lock_bh(&adap->tid_release_lock);
2213 adap->tid_release_task_busy = false;
2214 spin_unlock_bh(&adap->tid_release_lock);
2218 * Release a TID and inform HW. If we are unable to allocate the release
2219 * message we defer to a work queue.
2221 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid)
2224 struct sk_buff *skb;
2225 struct adapter *adap = container_of(t, struct adapter, tids);
2227 old = t->tid_tab[tid];
2228 skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
2230 t->tid_tab[tid] = NULL;
2231 mk_tid_release(skb, chan, tid);
2232 t4_ofld_send(adap, skb);
2234 cxgb4_queue_tid_release(t, chan, tid);
2236 atomic_dec(&t->tids_in_use);
2238 EXPORT_SYMBOL(cxgb4_remove_tid);
2241 * Allocate and initialize the TID tables. Returns 0 on success.
2243 static int tid_init(struct tid_info *t)
2246 unsigned int natids = t->natids;
2248 size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) +
2249 t->nstids * sizeof(*t->stid_tab) +
2250 BITS_TO_LONGS(t->nstids) * sizeof(long);
2251 t->tid_tab = t4_alloc_mem(size);
2255 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
2256 t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
2257 t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids];
2258 spin_lock_init(&t->stid_lock);
2259 spin_lock_init(&t->atid_lock);
2261 t->stids_in_use = 0;
2263 t->atids_in_use = 0;
2264 atomic_set(&t->tids_in_use, 0);
2266 /* Setup the free list for atid_tab and clear the stid bitmap. */
2269 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
2270 t->afree = t->atid_tab;
2272 bitmap_zero(t->stid_bmap, t->nstids);
2277 * cxgb4_create_server - create an IP server
2279 * @stid: the server TID
2280 * @sip: local IP address to bind server to
2281 * @sport: the server's TCP port
2282 * @queue: queue to direct messages from this server to
2284 * Create an IP server for the given port and address.
2285 * Returns <0 on error and one of the %NET_XMIT_* values on success.
2287 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
2288 __be32 sip, __be16 sport, unsigned int queue)
2291 struct sk_buff *skb;
2292 struct adapter *adap;
2293 struct cpl_pass_open_req *req;
2295 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
2299 adap = netdev2adap(dev);
2300 req = (struct cpl_pass_open_req *)__skb_put(skb, sizeof(*req));
2302 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
2303 req->local_port = sport;
2304 req->peer_port = htons(0);
2305 req->local_ip = sip;
2306 req->peer_ip = htonl(0);
2307 chan = netdev2pinfo(adap->sge.ingr_map[queue]->netdev)->tx_chan;
2308 req->opt0 = cpu_to_be64(TX_CHAN(chan));
2309 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
2310 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
2311 return t4_mgmt_tx(adap, skb);
2313 EXPORT_SYMBOL(cxgb4_create_server);
2316 * cxgb4_create_server6 - create an IPv6 server
2318 * @stid: the server TID
2319 * @sip: local IPv6 address to bind server to
2320 * @sport: the server's TCP port
2321 * @queue: queue to direct messages from this server to
2323 * Create an IPv6 server for the given port and address.
2324 * Returns <0 on error and one of the %NET_XMIT_* values on success.
2326 int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
2327 const struct in6_addr *sip, __be16 sport,
2331 struct sk_buff *skb;
2332 struct adapter *adap;
2333 struct cpl_pass_open_req6 *req;
2335 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
2339 adap = netdev2adap(dev);
2340 req = (struct cpl_pass_open_req6 *)__skb_put(skb, sizeof(*req));
2342 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, stid));
2343 req->local_port = sport;
2344 req->peer_port = htons(0);
2345 req->local_ip_hi = *(__be64 *)(sip->s6_addr);
2346 req->local_ip_lo = *(__be64 *)(sip->s6_addr + 8);
2347 req->peer_ip_hi = cpu_to_be64(0);
2348 req->peer_ip_lo = cpu_to_be64(0);
2349 chan = netdev2pinfo(adap->sge.ingr_map[queue]->netdev)->tx_chan;
2350 req->opt0 = cpu_to_be64(TX_CHAN(chan));
2351 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
2352 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
2353 return t4_mgmt_tx(adap, skb);
2355 EXPORT_SYMBOL(cxgb4_create_server6);
2358 * cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
2359 * @mtus: the HW MTU table
2360 * @mtu: the target MTU
2361 * @idx: index of selected entry in the MTU table
2363 * Returns the index and the value in the HW MTU table that is closest to
2364 * but does not exceed @mtu, unless @mtu is smaller than any value in the
2365 * table, in which case that smallest available value is selected.
2367 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
2372 while (i < NMTUS - 1 && mtus[i + 1] <= mtu)
2378 EXPORT_SYMBOL(cxgb4_best_mtu);
2381 * cxgb4_port_chan - get the HW channel of a port
2382 * @dev: the net device for the port
2384 * Return the HW Tx channel of the given port.
2386 unsigned int cxgb4_port_chan(const struct net_device *dev)
2388 return netdev2pinfo(dev)->tx_chan;
2390 EXPORT_SYMBOL(cxgb4_port_chan);
2393 * cxgb4_port_viid - get the VI id of a port
2394 * @dev: the net device for the port
2396 * Return the VI id of the given port.
2398 unsigned int cxgb4_port_viid(const struct net_device *dev)
2400 return netdev2pinfo(dev)->viid;
2402 EXPORT_SYMBOL(cxgb4_port_viid);
2405 * cxgb4_port_idx - get the index of a port
2406 * @dev: the net device for the port
2408 * Return the index of the given port.
2410 unsigned int cxgb4_port_idx(const struct net_device *dev)
2412 return netdev2pinfo(dev)->port_id;
2414 EXPORT_SYMBOL(cxgb4_port_idx);
2417 * cxgb4_netdev_by_hwid - return the net device of a HW port
2418 * @pdev: identifies the adapter
2419 * @id: the HW port id
2421 * Return the net device associated with the interface with the given HW
2424 struct net_device *cxgb4_netdev_by_hwid(struct pci_dev *pdev, unsigned int id)
2426 const struct adapter *adap = pci_get_drvdata(pdev);
2428 if (!adap || id >= NCHAN)
2430 id = adap->chan_map[id];
2431 return id < MAX_NPORTS ? adap->port[id] : NULL;
2433 EXPORT_SYMBOL(cxgb4_netdev_by_hwid);
2435 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
2436 struct tp_tcp_stats *v6)
2438 struct adapter *adap = pci_get_drvdata(pdev);
2440 spin_lock(&adap->stats_lock);
2441 t4_tp_get_tcp_stats(adap, v4, v6);
2442 spin_unlock(&adap->stats_lock);
2444 EXPORT_SYMBOL(cxgb4_get_tcp_stats);
2446 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
2447 const unsigned int *pgsz_order)
2449 struct adapter *adap = netdev2adap(dev);
2451 t4_write_reg(adap, ULP_RX_ISCSI_TAGMASK, tag_mask);
2452 t4_write_reg(adap, ULP_RX_ISCSI_PSZ, HPZ0(pgsz_order[0]) |
2453 HPZ1(pgsz_order[1]) | HPZ2(pgsz_order[2]) |
2454 HPZ3(pgsz_order[3]));
2456 EXPORT_SYMBOL(cxgb4_iscsi_init);
2458 static struct pci_driver cxgb4_driver;
2460 static void check_neigh_update(struct neighbour *neigh)
2462 const struct device *parent;
2463 const struct net_device *netdev = neigh->dev;
2465 if (netdev->priv_flags & IFF_802_1Q_VLAN)
2466 netdev = vlan_dev_real_dev(netdev);
2467 parent = netdev->dev.parent;
2468 if (parent && parent->driver == &cxgb4_driver.driver)
2469 t4_l2t_update(dev_get_drvdata(parent), neigh);
2472 static int netevent_cb(struct notifier_block *nb, unsigned long event,
2476 case NETEVENT_NEIGH_UPDATE:
2477 check_neigh_update(data);
2479 case NETEVENT_PMTU_UPDATE:
2480 case NETEVENT_REDIRECT:
2487 static bool netevent_registered;
2488 static struct notifier_block cxgb4_netevent_nb = {
2489 .notifier_call = netevent_cb
2492 static void uld_attach(struct adapter *adap, unsigned int uld)
2495 struct cxgb4_lld_info lli;
2497 lli.pdev = adap->pdev;
2498 lli.l2t = adap->l2t;
2499 lli.tids = &adap->tids;
2500 lli.ports = adap->port;
2501 lli.vr = &adap->vres;
2502 lli.mtus = adap->params.mtus;
2503 if (uld == CXGB4_ULD_RDMA) {
2504 lli.rxq_ids = adap->sge.rdma_rxq;
2505 lli.nrxq = adap->sge.rdmaqs;
2506 } else if (uld == CXGB4_ULD_ISCSI) {
2507 lli.rxq_ids = adap->sge.ofld_rxq;
2508 lli.nrxq = adap->sge.ofldqsets;
2510 lli.ntxq = adap->sge.ofldqsets;
2511 lli.nchan = adap->params.nports;
2512 lli.nports = adap->params.nports;
2513 lli.wr_cred = adap->params.ofldq_wr_cred;
2514 lli.adapter_type = adap->params.rev;
2515 lli.iscsi_iolen = MAXRXDATA_GET(t4_read_reg(adap, TP_PARA_REG2));
2516 lli.udb_density = 1 << QUEUESPERPAGEPF0_GET(
2517 t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF) >>
2519 lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET(
2520 t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >>
2522 lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
2523 lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
2524 lli.fw_vers = adap->params.fw_vers;
2526 handle = ulds[uld].add(&lli);
2527 if (IS_ERR(handle)) {
2528 dev_warn(adap->pdev_dev,
2529 "could not attach to the %s driver, error %ld\n",
2530 uld_str[uld], PTR_ERR(handle));
2534 adap->uld_handle[uld] = handle;
2536 if (!netevent_registered) {
2537 register_netevent_notifier(&cxgb4_netevent_nb);
2538 netevent_registered = true;
2541 if (adap->flags & FULL_INIT_DONE)
2542 ulds[uld].state_change(handle, CXGB4_STATE_UP);
2545 static void attach_ulds(struct adapter *adap)
2549 mutex_lock(&uld_mutex);
2550 list_add_tail(&adap->list_node, &adapter_list);
2551 for (i = 0; i < CXGB4_ULD_MAX; i++)
2553 uld_attach(adap, i);
2554 mutex_unlock(&uld_mutex);
2557 static void detach_ulds(struct adapter *adap)
2561 mutex_lock(&uld_mutex);
2562 list_del(&adap->list_node);
2563 for (i = 0; i < CXGB4_ULD_MAX; i++)
2564 if (adap->uld_handle[i]) {
2565 ulds[i].state_change(adap->uld_handle[i],
2566 CXGB4_STATE_DETACH);
2567 adap->uld_handle[i] = NULL;
2569 if (netevent_registered && list_empty(&adapter_list)) {
2570 unregister_netevent_notifier(&cxgb4_netevent_nb);
2571 netevent_registered = false;
2573 mutex_unlock(&uld_mutex);
2576 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
2580 mutex_lock(&uld_mutex);
2581 for (i = 0; i < CXGB4_ULD_MAX; i++)
2582 if (adap->uld_handle[i])
2583 ulds[i].state_change(adap->uld_handle[i], new_state);
2584 mutex_unlock(&uld_mutex);
2588 * cxgb4_register_uld - register an upper-layer driver
2589 * @type: the ULD type
2590 * @p: the ULD methods
2592 * Registers an upper-layer driver with this driver and notifies the ULD
2593 * about any presently available devices that support its type. Returns
2594 * %-EBUSY if a ULD of the same type is already registered.
2596 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
2599 struct adapter *adap;
2601 if (type >= CXGB4_ULD_MAX)
2603 mutex_lock(&uld_mutex);
2604 if (ulds[type].add) {
2609 list_for_each_entry(adap, &adapter_list, list_node)
2610 uld_attach(adap, type);
2611 out: mutex_unlock(&uld_mutex);
2614 EXPORT_SYMBOL(cxgb4_register_uld);
2617 * cxgb4_unregister_uld - unregister an upper-layer driver
2618 * @type: the ULD type
2620 * Unregisters an existing upper-layer driver.
2622 int cxgb4_unregister_uld(enum cxgb4_uld type)
2624 struct adapter *adap;
2626 if (type >= CXGB4_ULD_MAX)
2628 mutex_lock(&uld_mutex);
2629 list_for_each_entry(adap, &adapter_list, list_node)
2630 adap->uld_handle[type] = NULL;
2631 ulds[type].add = NULL;
2632 mutex_unlock(&uld_mutex);
2635 EXPORT_SYMBOL(cxgb4_unregister_uld);
2638 * cxgb_up - enable the adapter
2639 * @adap: adapter being enabled
2641 * Called when the first port is enabled, this function performs the
2642 * actions necessary to make an adapter operational, such as completing
2643 * the initialization of HW modules, and enabling interrupts.
2645 * Must be called with the rtnl lock held.
2647 static int cxgb_up(struct adapter *adap)
2651 err = setup_sge_queues(adap);
2654 err = setup_rss(adap);
2658 if (adap->flags & USING_MSIX) {
2659 name_msix_vecs(adap);
2660 err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
2661 adap->msix_info[0].desc, adap);
2665 err = request_msix_queue_irqs(adap);
2667 free_irq(adap->msix_info[0].vec, adap);
2671 err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
2672 (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
2679 t4_intr_enable(adap);
2680 adap->flags |= FULL_INIT_DONE;
2681 notify_ulds(adap, CXGB4_STATE_UP);
2685 dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
2687 t4_free_sge_resources(adap);
2691 static void cxgb_down(struct adapter *adapter)
2693 t4_intr_disable(adapter);
2694 cancel_work_sync(&adapter->tid_release_task);
2695 adapter->tid_release_task_busy = false;
2696 adapter->tid_release_head = NULL;
2698 if (adapter->flags & USING_MSIX) {
2699 free_msix_queue_irqs(adapter);
2700 free_irq(adapter->msix_info[0].vec, adapter);
2702 free_irq(adapter->pdev->irq, adapter);
2703 quiesce_rx(adapter);
2704 t4_sge_stop(adapter);
2705 t4_free_sge_resources(adapter);
2706 adapter->flags &= ~FULL_INIT_DONE;
2710 * net_device operations
2712 static int cxgb_open(struct net_device *dev)
2715 struct port_info *pi = netdev_priv(dev);
2716 struct adapter *adapter = pi->adapter;
2718 if (!(adapter->flags & FULL_INIT_DONE)) {
2719 err = cxgb_up(adapter);
2724 dev->real_num_tx_queues = pi->nqsets;
2725 err = link_start(dev);
2727 netif_tx_start_all_queues(dev);
2731 static int cxgb_close(struct net_device *dev)
2733 struct port_info *pi = netdev_priv(dev);
2734 struct adapter *adapter = pi->adapter;
2736 netif_tx_stop_all_queues(dev);
2737 netif_carrier_off(dev);
2738 return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false);
2741 static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev,
2742 struct rtnl_link_stats64 *ns)
2744 struct port_stats stats;
2745 struct port_info *p = netdev_priv(dev);
2746 struct adapter *adapter = p->adapter;
2748 spin_lock(&adapter->stats_lock);
2749 t4_get_port_stats(adapter, p->tx_chan, &stats);
2750 spin_unlock(&adapter->stats_lock);
2752 ns->tx_bytes = stats.tx_octets;
2753 ns->tx_packets = stats.tx_frames;
2754 ns->rx_bytes = stats.rx_octets;
2755 ns->rx_packets = stats.rx_frames;
2756 ns->multicast = stats.rx_mcast_frames;
2758 /* detailed rx_errors */
2759 ns->rx_length_errors = stats.rx_jabber + stats.rx_too_long +
2761 ns->rx_over_errors = 0;
2762 ns->rx_crc_errors = stats.rx_fcs_err;
2763 ns->rx_frame_errors = stats.rx_symbol_err;
2764 ns->rx_fifo_errors = stats.rx_ovflow0 + stats.rx_ovflow1 +
2765 stats.rx_ovflow2 + stats.rx_ovflow3 +
2766 stats.rx_trunc0 + stats.rx_trunc1 +
2767 stats.rx_trunc2 + stats.rx_trunc3;
2768 ns->rx_missed_errors = 0;
2770 /* detailed tx_errors */
2771 ns->tx_aborted_errors = 0;
2772 ns->tx_carrier_errors = 0;
2773 ns->tx_fifo_errors = 0;
2774 ns->tx_heartbeat_errors = 0;
2775 ns->tx_window_errors = 0;
2777 ns->tx_errors = stats.tx_error_frames;
2778 ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err +
2779 ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors;
2783 static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
2786 int ret = 0, prtad, devad;
2787 struct port_info *pi = netdev_priv(dev);
2788 struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data;
2792 if (pi->mdio_addr < 0)
2794 data->phy_id = pi->mdio_addr;
2798 if (mdio_phy_id_is_c45(data->phy_id)) {
2799 prtad = mdio_phy_id_prtad(data->phy_id);
2800 devad = mdio_phy_id_devad(data->phy_id);
2801 } else if (data->phy_id < 32) {
2802 prtad = data->phy_id;
2804 data->reg_num &= 0x1f;
2808 mbox = pi->adapter->fn;
2809 if (cmd == SIOCGMIIREG)
2810 ret = t4_mdio_rd(pi->adapter, mbox, prtad, devad,
2811 data->reg_num, &data->val_out);
2813 ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad,
2814 data->reg_num, data->val_in);
2822 static void cxgb_set_rxmode(struct net_device *dev)
2824 /* unfortunately we can't return errors to the stack */
2825 set_rxmode(dev, -1, false);
2828 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
2831 struct port_info *pi = netdev_priv(dev);
2833 if (new_mtu < 81 || new_mtu > MAX_MTU) /* accommodate SACK */
2835 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, new_mtu, -1,
2842 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
2845 struct sockaddr *addr = p;
2846 struct port_info *pi = netdev_priv(dev);
2848 if (!is_valid_ether_addr(addr->sa_data))
2851 ret = t4_change_mac(pi->adapter, pi->adapter->fn, pi->viid,
2852 pi->xact_addr_filt, addr->sa_data, true, true);
2856 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2857 pi->xact_addr_filt = ret;
2861 static void vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
2863 struct port_info *pi = netdev_priv(dev);
2866 t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1, -1, -1, -1,
2870 #ifdef CONFIG_NET_POLL_CONTROLLER
2871 static void cxgb_netpoll(struct net_device *dev)
2873 struct port_info *pi = netdev_priv(dev);
2874 struct adapter *adap = pi->adapter;
2876 if (adap->flags & USING_MSIX) {
2878 struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
2880 for (i = pi->nqsets; i; i--, rx++)
2881 t4_sge_intr_msix(0, &rx->rspq);
2883 t4_intr_handler(adap)(0, adap);
2887 static const struct net_device_ops cxgb4_netdev_ops = {
2888 .ndo_open = cxgb_open,
2889 .ndo_stop = cxgb_close,
2890 .ndo_start_xmit = t4_eth_xmit,
2891 .ndo_get_stats64 = cxgb_get_stats,
2892 .ndo_set_rx_mode = cxgb_set_rxmode,
2893 .ndo_set_mac_address = cxgb_set_mac_addr,
2894 .ndo_validate_addr = eth_validate_addr,
2895 .ndo_do_ioctl = cxgb_ioctl,
2896 .ndo_change_mtu = cxgb_change_mtu,
2897 .ndo_vlan_rx_register = vlan_rx_register,
2898 #ifdef CONFIG_NET_POLL_CONTROLLER
2899 .ndo_poll_controller = cxgb_netpoll,
2903 void t4_fatal_err(struct adapter *adap)
2905 t4_set_reg_field(adap, SGE_CONTROL, GLOBALENABLE, 0);
2906 t4_intr_disable(adap);
2907 dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
2910 static void setup_memwin(struct adapter *adap)
2914 bar0 = pci_resource_start(adap->pdev, 0); /* truncation intentional */
2915 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0),
2916 (bar0 + MEMWIN0_BASE) | BIR(0) |
2917 WINDOW(ilog2(MEMWIN0_APERTURE) - 10));
2918 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 1),
2919 (bar0 + MEMWIN1_BASE) | BIR(0) |
2920 WINDOW(ilog2(MEMWIN1_APERTURE) - 10));
2921 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2),
2922 (bar0 + MEMWIN2_BASE) | BIR(0) |
2923 WINDOW(ilog2(MEMWIN2_APERTURE) - 10));
2924 if (adap->vres.ocq.size) {
2925 unsigned int start, sz_kb;
2927 start = pci_resource_start(adap->pdev, 2) +
2928 OCQ_WIN_OFFSET(adap->pdev, &adap->vres);
2929 sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10;
2931 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 3),
2932 start | BIR(1) | WINDOW(ilog2(sz_kb)));
2934 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3),
2935 adap->vres.ocq.start);
2937 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3));
2941 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
2946 /* get device capabilities */
2947 memset(c, 0, sizeof(*c));
2948 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2949 FW_CMD_REQUEST | FW_CMD_READ);
2950 c->retval_len16 = htonl(FW_LEN16(*c));
2951 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
2955 /* select capabilities we'll be using */
2956 if (c->niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
2958 c->niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
2960 c->niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
2961 } else if (vf_acls) {
2962 dev_err(adap->pdev_dev, "virtualization ACLs not supported");
2965 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2966 FW_CMD_REQUEST | FW_CMD_WRITE);
2967 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), NULL);
2971 ret = t4_config_glbl_rss(adap, adap->fn,
2972 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
2973 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN |
2974 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP);
2978 ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, MAX_EGRQ, 64, MAX_INGQ,
2979 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF);
2985 /* tweak some settings */
2986 t4_write_reg(adap, TP_SHIFT_CNT, 0x64f8849);
2987 t4_write_reg(adap, ULP_RX_TDDP_PSZ, HPZ0(PAGE_SHIFT - 12));
2988 t4_write_reg(adap, TP_PIO_ADDR, TP_INGRESS_CONFIG);
2989 v = t4_read_reg(adap, TP_PIO_DATA);
2990 t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR);
2992 /* get basic stuff going */
2993 return t4_early_init(adap, adap->fn);
2997 * Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
2999 #define MAX_ATIDS 8192U
3002 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
3004 static int adap_init0(struct adapter *adap)
3008 enum dev_state state;
3009 u32 params[7], val[7];
3010 struct fw_caps_config_cmd c;
3012 ret = t4_check_fw_version(adap);
3013 if (ret == -EINVAL || ret > 0) {
3014 if (upgrade_fw(adap) >= 0) /* recache FW version */
3015 ret = t4_check_fw_version(adap);
3020 /* contact FW, request master */
3021 ret = t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, &state);
3023 dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
3029 ret = t4_fw_reset(adap, adap->fn, PIORSTMODE | PIORST);
3033 for (v = 0; v < SGE_NTIMERS - 1; v++)
3034 adap->sge.timer_val[v] = min(intr_holdoff[v], MAX_SGE_TIMERVAL);
3035 adap->sge.timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL;
3036 adap->sge.counter_val[0] = 1;
3037 for (v = 1; v < SGE_NCOUNTERS; v++)
3038 adap->sge.counter_val[v] = min(intr_cnt[v - 1],
3040 #define FW_PARAM_DEV(param) \
3041 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3042 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3044 params[0] = FW_PARAM_DEV(CCLK);
3045 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 1, params, val);
3048 adap->params.vpd.cclk = val[0];
3050 ret = adap_init1(adap, &c);
3054 #define FW_PARAM_PFVF(param) \
3055 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3056 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param) | \
3057 FW_PARAMS_PARAM_Y(adap->fn))
3059 params[0] = FW_PARAM_DEV(PORTVEC);
3060 params[1] = FW_PARAM_PFVF(L2T_START);
3061 params[2] = FW_PARAM_PFVF(L2T_END);
3062 params[3] = FW_PARAM_PFVF(FILTER_START);
3063 params[4] = FW_PARAM_PFVF(FILTER_END);
3064 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 5, params, val);
3068 adap->tids.ftid_base = val[3];
3069 adap->tids.nftids = val[4] - val[3] + 1;
3072 /* query offload-related parameters */
3073 params[0] = FW_PARAM_DEV(NTID);
3074 params[1] = FW_PARAM_PFVF(SERVER_START);
3075 params[2] = FW_PARAM_PFVF(SERVER_END);
3076 params[3] = FW_PARAM_PFVF(TDDP_START);
3077 params[4] = FW_PARAM_PFVF(TDDP_END);
3078 params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3079 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3083 adap->tids.ntids = val[0];
3084 adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
3085 adap->tids.stid_base = val[1];
3086 adap->tids.nstids = val[2] - val[1] + 1;
3087 adap->vres.ddp.start = val[3];
3088 adap->vres.ddp.size = val[4] - val[3] + 1;
3089 adap->params.ofldq_wr_cred = val[5];
3090 adap->params.offload = 1;
3093 params[0] = FW_PARAM_PFVF(STAG_START);
3094 params[1] = FW_PARAM_PFVF(STAG_END);
3095 params[2] = FW_PARAM_PFVF(RQ_START);
3096 params[3] = FW_PARAM_PFVF(RQ_END);
3097 params[4] = FW_PARAM_PFVF(PBL_START);
3098 params[5] = FW_PARAM_PFVF(PBL_END);
3099 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3103 adap->vres.stag.start = val[0];
3104 adap->vres.stag.size = val[1] - val[0] + 1;
3105 adap->vres.rq.start = val[2];
3106 adap->vres.rq.size = val[3] - val[2] + 1;
3107 adap->vres.pbl.start = val[4];
3108 adap->vres.pbl.size = val[5] - val[4] + 1;
3110 params[0] = FW_PARAM_PFVF(SQRQ_START);
3111 params[1] = FW_PARAM_PFVF(SQRQ_END);
3112 params[2] = FW_PARAM_PFVF(CQ_START);
3113 params[3] = FW_PARAM_PFVF(CQ_END);
3114 params[4] = FW_PARAM_PFVF(OCQ_START);
3115 params[5] = FW_PARAM_PFVF(OCQ_END);
3116 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3120 adap->vres.qp.start = val[0];
3121 adap->vres.qp.size = val[1] - val[0] + 1;
3122 adap->vres.cq.start = val[2];
3123 adap->vres.cq.size = val[3] - val[2] + 1;
3124 adap->vres.ocq.start = val[4];
3125 adap->vres.ocq.size = val[5] - val[4] + 1;
3128 params[0] = FW_PARAM_PFVF(ISCSI_START);
3129 params[1] = FW_PARAM_PFVF(ISCSI_END);
3130 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 2, params,
3134 adap->vres.iscsi.start = val[0];
3135 adap->vres.iscsi.size = val[1] - val[0] + 1;
3137 #undef FW_PARAM_PFVF
3140 adap->params.nports = hweight32(port_vec);
3141 adap->params.portvec = port_vec;
3142 adap->flags |= FW_OK;
3144 /* These are finalized by FW initialization, load their values now */
3145 v = t4_read_reg(adap, TP_TIMER_RESOLUTION);
3146 adap->params.tp.tre = TIMERRESOLUTION_GET(v);
3147 t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
3148 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
3149 adap->params.b_wnd);
3151 #ifdef CONFIG_PCI_IOV
3153 * Provision resource limits for Virtual Functions. We currently
3154 * grant them all the same static resource limits except for the Port
3155 * Access Rights Mask which we're assigning based on the PF. All of
3156 * the static provisioning stuff for both the PF and VF really needs
3157 * to be managed in a persistent manner for each device which the
3158 * firmware controls.
3163 for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
3164 if (num_vf[pf] <= 0)
3167 /* VF numbering starts at 1! */
3168 for (vf = 1; vf <= num_vf[pf]; vf++) {
3169 ret = t4_cfg_pfvf(adap, adap->fn, pf, vf,
3170 VFRES_NEQ, VFRES_NETHCTRL,
3171 VFRES_NIQFLINT, VFRES_NIQ,
3172 VFRES_TC, VFRES_NVI,
3173 FW_PFVF_CMD_CMASK_MASK,
3174 pfvfres_pmask(adap, pf, vf),
3176 VFRES_R_CAPS, VFRES_WX_CAPS);
3178 dev_warn(adap->pdev_dev, "failed to "
3179 "provision pf/vf=%d/%d; "
3180 "err=%d\n", pf, vf, ret);
3190 * If a command timed out or failed with EIO FW does not operate within
3191 * its spec or something catastrophic happened to HW/FW, stop issuing
3194 bye: if (ret != -ETIMEDOUT && ret != -EIO)
3195 t4_fw_bye(adap, adap->fn);
3201 static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
3202 pci_channel_state_t state)
3205 struct adapter *adap = pci_get_drvdata(pdev);
3211 adap->flags &= ~FW_OK;
3212 notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
3213 for_each_port(adap, i) {
3214 struct net_device *dev = adap->port[i];
3216 netif_device_detach(dev);
3217 netif_carrier_off(dev);
3219 if (adap->flags & FULL_INIT_DONE)
3222 pci_disable_device(pdev);
3223 out: return state == pci_channel_io_perm_failure ?
3224 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
3227 static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
3230 struct fw_caps_config_cmd c;
3231 struct adapter *adap = pci_get_drvdata(pdev);
3234 pci_restore_state(pdev);
3235 pci_save_state(pdev);
3236 return PCI_ERS_RESULT_RECOVERED;
3239 if (pci_enable_device(pdev)) {
3240 dev_err(&pdev->dev, "cannot reenable PCI device after reset\n");
3241 return PCI_ERS_RESULT_DISCONNECT;
3244 pci_set_master(pdev);
3245 pci_restore_state(pdev);
3246 pci_save_state(pdev);
3247 pci_cleanup_aer_uncorrect_error_status(pdev);
3249 if (t4_wait_dev_ready(adap) < 0)
3250 return PCI_ERS_RESULT_DISCONNECT;
3251 if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL))
3252 return PCI_ERS_RESULT_DISCONNECT;
3253 adap->flags |= FW_OK;
3254 if (adap_init1(adap, &c))
3255 return PCI_ERS_RESULT_DISCONNECT;
3257 for_each_port(adap, i) {
3258 struct port_info *p = adap2pinfo(adap, i);
3260 ret = t4_alloc_vi(adap, adap->fn, p->tx_chan, adap->fn, 0, 1,
3263 return PCI_ERS_RESULT_DISCONNECT;
3265 p->xact_addr_filt = -1;
3268 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
3269 adap->params.b_wnd);
3272 return PCI_ERS_RESULT_DISCONNECT;
3273 return PCI_ERS_RESULT_RECOVERED;
3276 static void eeh_resume(struct pci_dev *pdev)
3279 struct adapter *adap = pci_get_drvdata(pdev);
3285 for_each_port(adap, i) {
3286 struct net_device *dev = adap->port[i];
3288 if (netif_running(dev)) {
3290 cxgb_set_rxmode(dev);
3292 netif_device_attach(dev);
3297 static struct pci_error_handlers cxgb4_eeh = {
3298 .error_detected = eeh_err_detected,
3299 .slot_reset = eeh_slot_reset,
3300 .resume = eeh_resume,
3303 static inline bool is_10g_port(const struct link_config *lc)
3305 return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
3308 static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx,
3309 unsigned int size, unsigned int iqe_size)
3311 q->intr_params = QINTR_TIMER_IDX(timer_idx) |
3312 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0);
3313 q->pktcnt_idx = pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0;
3314 q->iqe_len = iqe_size;
3319 * Perform default configuration of DMA queues depending on the number and type
3320 * of ports we found and the number of available CPUs. Most settings can be
3321 * modified by the admin prior to actual use.
3323 static void __devinit cfg_queues(struct adapter *adap)
3325 struct sge *s = &adap->sge;
3326 int i, q10g = 0, n10g = 0, qidx = 0;
3328 for_each_port(adap, i)
3329 n10g += is_10g_port(&adap2pinfo(adap, i)->link_cfg);
3332 * We default to 1 queue per non-10G port and up to # of cores queues
3336 q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
3337 if (q10g > num_online_cpus())
3338 q10g = num_online_cpus();
3340 for_each_port(adap, i) {
3341 struct port_info *pi = adap2pinfo(adap, i);
3343 pi->first_qset = qidx;
3344 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
3349 s->max_ethqsets = qidx; /* MSI-X may lower it later */
3351 if (is_offload(adap)) {
3353 * For offload we use 1 queue/channel if all ports are up to 1G,
3354 * otherwise we divide all available queues amongst the channels
3355 * capped by the number of available cores.
3358 i = min_t(int, ARRAY_SIZE(s->ofldrxq),
3360 s->ofldqsets = roundup(i, adap->params.nports);
3362 s->ofldqsets = adap->params.nports;
3363 /* For RDMA one Rx queue per channel suffices */
3364 s->rdmaqs = adap->params.nports;
3367 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
3368 struct sge_eth_rxq *r = &s->ethrxq[i];
3370 init_rspq(&r->rspq, 0, 0, 1024, 64);
3374 for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
3375 s->ethtxq[i].q.size = 1024;
3377 for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
3378 s->ctrlq[i].q.size = 512;
3380 for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
3381 s->ofldtxq[i].q.size = 1024;
3383 for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
3384 struct sge_ofld_rxq *r = &s->ofldrxq[i];
3386 init_rspq(&r->rspq, 0, 0, 1024, 64);
3387 r->rspq.uld = CXGB4_ULD_ISCSI;
3391 for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
3392 struct sge_ofld_rxq *r = &s->rdmarxq[i];
3394 init_rspq(&r->rspq, 0, 0, 511, 64);
3395 r->rspq.uld = CXGB4_ULD_RDMA;
3399 init_rspq(&s->fw_evtq, 6, 0, 512, 64);
3400 init_rspq(&s->intrq, 6, 0, 2 * MAX_INGQ, 64);
3404 * Reduce the number of Ethernet queues across all ports to at most n.
3405 * n provides at least one queue per port.
3407 static void __devinit reduce_ethqs(struct adapter *adap, int n)
3410 struct port_info *pi;
3412 while (n < adap->sge.ethqsets)
3413 for_each_port(adap, i) {
3414 pi = adap2pinfo(adap, i);
3415 if (pi->nqsets > 1) {
3417 adap->sge.ethqsets--;
3418 if (adap->sge.ethqsets <= n)
3424 for_each_port(adap, i) {
3425 pi = adap2pinfo(adap, i);
3431 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
3432 #define EXTRA_VECS 2
3434 static int __devinit enable_msix(struct adapter *adap)
3437 int i, err, want, need;
3438 struct sge *s = &adap->sge;
3439 unsigned int nchan = adap->params.nports;
3440 struct msix_entry entries[MAX_INGQ + 1];
3442 for (i = 0; i < ARRAY_SIZE(entries); ++i)
3443 entries[i].entry = i;
3445 want = s->max_ethqsets + EXTRA_VECS;
3446 if (is_offload(adap)) {
3447 want += s->rdmaqs + s->ofldqsets;
3448 /* need nchan for each possible ULD */
3449 ofld_need = 2 * nchan;
3451 need = adap->params.nports + EXTRA_VECS + ofld_need;
3453 while ((err = pci_enable_msix(adap->pdev, entries, want)) >= need)
3458 * Distribute available vectors to the various queue groups.
3459 * Every group gets its minimum requirement and NIC gets top
3460 * priority for leftovers.
3462 i = want - EXTRA_VECS - ofld_need;
3463 if (i < s->max_ethqsets) {
3464 s->max_ethqsets = i;
3465 if (i < s->ethqsets)
3466 reduce_ethqs(adap, i);
3468 if (is_offload(adap)) {
3469 i = want - EXTRA_VECS - s->max_ethqsets;
3470 i -= ofld_need - nchan;
3471 s->ofldqsets = (i / nchan) * nchan; /* round down */
3473 for (i = 0; i < want; ++i)
3474 adap->msix_info[i].vec = entries[i].vector;
3476 dev_info(adap->pdev_dev,
3477 "only %d MSI-X vectors left, not using MSI-X\n", err);
3483 static int __devinit init_rss(struct adapter *adap)
3487 for_each_port(adap, i) {
3488 struct port_info *pi = adap2pinfo(adap, i);
3490 pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
3493 for (j = 0; j < pi->rss_size; j++)
3494 pi->rss[j] = j % pi->nqsets;
3499 static void __devinit print_port_info(struct adapter *adap)
3501 static const char *base[] = {
3502 "R XFI", "R XAUI", "T SGMII", "T XFI", "T XAUI", "KX4", "CX4",
3503 "KX", "KR", "KR SFP+", "KR FEC"
3508 const char *spd = "";
3510 if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_2_5GB)
3512 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_5_0GB)
3515 for_each_port(adap, i) {
3516 struct net_device *dev = adap->port[i];
3517 const struct port_info *pi = netdev_priv(dev);
3520 if (!test_bit(i, &adap->registered_device_map))
3523 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
3524 bufp += sprintf(bufp, "100/");
3525 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
3526 bufp += sprintf(bufp, "1000/");
3527 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
3528 bufp += sprintf(bufp, "10G/");
3531 sprintf(bufp, "BASE-%s", base[pi->port_type]);
3533 netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n",
3534 adap->params.vpd.id, adap->params.rev,
3535 buf, is_offload(adap) ? "R" : "",
3536 adap->params.pci.width, spd,
3537 (adap->flags & USING_MSIX) ? " MSI-X" :
3538 (adap->flags & USING_MSI) ? " MSI" : "");
3539 if (adap->name == dev->name)
3540 netdev_info(dev, "S/N: %s, E/C: %s\n",
3541 adap->params.vpd.sn, adap->params.vpd.ec);
3546 * Free the following resources:
3547 * - memory used for tables
3550 * - resources FW is holding for us
3552 static void free_some_resources(struct adapter *adapter)
3556 t4_free_mem(adapter->l2t);
3557 t4_free_mem(adapter->tids.tid_tab);
3558 disable_msi(adapter);
3560 for_each_port(adapter, i)
3561 if (adapter->port[i]) {
3562 kfree(adap2pinfo(adapter, i)->rss);
3563 free_netdev(adapter->port[i]);
3565 if (adapter->flags & FW_OK)
3566 t4_fw_bye(adapter, adapter->fn);
3569 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
3570 NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
3572 static int __devinit init_one(struct pci_dev *pdev,
3573 const struct pci_device_id *ent)
3576 struct port_info *pi;
3577 unsigned int highdma = 0;
3578 struct adapter *adapter = NULL;
3580 printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
3582 err = pci_request_regions(pdev, KBUILD_MODNAME);
3584 /* Just info, some other driver may have claimed the device. */
3585 dev_info(&pdev->dev, "cannot obtain PCI resources\n");
3589 /* We control everything through one PF */
3590 func = PCI_FUNC(pdev->devfn);
3591 if (func != ent->driver_data) {
3592 pci_save_state(pdev); /* to restore SR-IOV later */
3596 err = pci_enable_device(pdev);
3598 dev_err(&pdev->dev, "cannot enable PCI device\n");
3599 goto out_release_regions;
3602 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
3603 highdma = NETIF_F_HIGHDMA;
3604 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3606 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
3607 "coherent allocations\n");
3608 goto out_disable_device;
3611 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3613 dev_err(&pdev->dev, "no usable DMA configuration\n");
3614 goto out_disable_device;
3618 pci_enable_pcie_error_reporting(pdev);
3619 pci_set_master(pdev);
3620 pci_save_state(pdev);
3622 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
3625 goto out_disable_device;
3628 adapter->regs = pci_ioremap_bar(pdev, 0);
3629 if (!adapter->regs) {
3630 dev_err(&pdev->dev, "cannot map device registers\n");
3632 goto out_free_adapter;
3635 adapter->pdev = pdev;
3636 adapter->pdev_dev = &pdev->dev;
3638 adapter->name = pci_name(pdev);
3639 adapter->msg_enable = dflt_msg_enable;
3640 memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
3642 spin_lock_init(&adapter->stats_lock);
3643 spin_lock_init(&adapter->tid_release_lock);
3645 INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
3647 err = t4_prep_adapter(adapter);
3650 err = adap_init0(adapter);
3654 for_each_port(adapter, i) {
3655 struct net_device *netdev;
3657 netdev = alloc_etherdev_mq(sizeof(struct port_info),
3664 SET_NETDEV_DEV(netdev, &pdev->dev);
3666 adapter->port[i] = netdev;
3667 pi = netdev_priv(netdev);
3668 pi->adapter = adapter;
3669 pi->xact_addr_filt = -1;
3670 pi->rx_offload = RX_CSO;
3672 netif_carrier_off(netdev);
3673 netif_tx_stop_all_queues(netdev);
3674 netdev->irq = pdev->irq;
3676 netdev->features |= NETIF_F_SG | TSO_FLAGS;
3677 netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
3678 netdev->features |= NETIF_F_GRO | NETIF_F_RXHASH | highdma;
3679 netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
3680 netdev->vlan_features = netdev->features & VLAN_FEAT;
3682 netdev->netdev_ops = &cxgb4_netdev_ops;
3683 SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
3686 pci_set_drvdata(pdev, adapter);
3688 if (adapter->flags & FW_OK) {
3689 err = t4_port_init(adapter, func, func, 0);
3695 * Configure queues and allocate tables now, they can be needed as
3696 * soon as the first register_netdev completes.
3698 cfg_queues(adapter);
3700 adapter->l2t = t4_init_l2t();
3701 if (!adapter->l2t) {
3702 /* We tolerate a lack of L2T, giving up some functionality */
3703 dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
3704 adapter->params.offload = 0;
3707 if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
3708 dev_warn(&pdev->dev, "could not allocate TID table, "
3710 adapter->params.offload = 0;
3713 /* See what interrupts we'll be using */
3714 if (msi > 1 && enable_msix(adapter) == 0)
3715 adapter->flags |= USING_MSIX;
3716 else if (msi > 0 && pci_enable_msi(pdev) == 0)
3717 adapter->flags |= USING_MSI;
3719 err = init_rss(adapter);
3724 * The card is now ready to go. If any errors occur during device
3725 * registration we do not fail the whole card but rather proceed only
3726 * with the ports we manage to register successfully. However we must
3727 * register at least one net device.
3729 for_each_port(adapter, i) {
3730 err = register_netdev(adapter->port[i]);
3732 dev_warn(&pdev->dev,
3733 "cannot register net device %s, skipping\n",
3734 adapter->port[i]->name);
3737 * Change the name we use for messages to the name of
3738 * the first successfully registered interface.
3740 if (!adapter->registered_device_map)
3741 adapter->name = adapter->port[i]->name;
3743 __set_bit(i, &adapter->registered_device_map);
3744 adapter->chan_map[adap2pinfo(adapter, i)->tx_chan] = i;
3747 if (!adapter->registered_device_map) {
3748 dev_err(&pdev->dev, "could not register any net devices\n");
3752 if (cxgb4_debugfs_root) {
3753 adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
3754 cxgb4_debugfs_root);
3755 setup_debugfs(adapter);
3758 if (is_offload(adapter))
3759 attach_ulds(adapter);
3761 print_port_info(adapter);
3764 #ifdef CONFIG_PCI_IOV
3765 if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
3766 if (pci_enable_sriov(pdev, num_vf[func]) == 0)
3767 dev_info(&pdev->dev,
3768 "instantiated %u virtual functions\n",
3774 free_some_resources(adapter);
3776 iounmap(adapter->regs);
3780 pci_disable_pcie_error_reporting(pdev);
3781 pci_disable_device(pdev);
3782 out_release_regions:
3783 pci_release_regions(pdev);
3784 pci_set_drvdata(pdev, NULL);
3788 static void __devexit remove_one(struct pci_dev *pdev)
3790 struct adapter *adapter = pci_get_drvdata(pdev);
3792 pci_disable_sriov(pdev);
3797 if (is_offload(adapter))
3798 detach_ulds(adapter);
3800 for_each_port(adapter, i)
3801 if (test_bit(i, &adapter->registered_device_map))
3802 unregister_netdev(adapter->port[i]);
3804 if (adapter->debugfs_root)
3805 debugfs_remove_recursive(adapter->debugfs_root);
3807 if (adapter->flags & FULL_INIT_DONE)
3810 free_some_resources(adapter);
3811 iounmap(adapter->regs);
3813 pci_disable_pcie_error_reporting(pdev);
3814 pci_disable_device(pdev);
3815 pci_release_regions(pdev);
3816 pci_set_drvdata(pdev, NULL);
3817 } else if (PCI_FUNC(pdev->devfn) > 0)
3818 pci_release_regions(pdev);
3821 static struct pci_driver cxgb4_driver = {
3822 .name = KBUILD_MODNAME,
3823 .id_table = cxgb4_pci_tbl,
3825 .remove = __devexit_p(remove_one),
3826 .err_handler = &cxgb4_eeh,
3829 static int __init cxgb4_init_module(void)
3833 /* Debugfs support is optional, just warn if this fails */
3834 cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3835 if (!cxgb4_debugfs_root)
3836 pr_warning("could not create debugfs entry, continuing\n");
3838 ret = pci_register_driver(&cxgb4_driver);
3840 debugfs_remove(cxgb4_debugfs_root);
3844 static void __exit cxgb4_cleanup_module(void)
3846 pci_unregister_driver(&cxgb4_driver);
3847 debugfs_remove(cxgb4_debugfs_root); /* NULL ok */
3850 module_init(cxgb4_init_module);
3851 module_exit(cxgb4_cleanup_module);