net: drop the weight argument from netif_napi_add
[linux-2.6-block.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005 - 2016 Broadcom
4  * All rights reserved.
5  *
6  * Contact Information:
7  * linux-drivers@emulex.com
8  *
9  * Emulex
10  * 3333 Susan Street
11  * Costa Mesa, CA 92626
12  */
13
14 #include <linux/prefetch.h>
15 #include <linux/module.h>
16 #include "be.h"
17 #include "be_cmds.h"
18 #include <asm/div64.h>
19 #include <linux/aer.h>
20 #include <linux/if_bridge.h>
21 #include <net/busy_poll.h>
22 #include <net/vxlan.h>
23
24 MODULE_DESCRIPTION(DRV_DESC);
25 MODULE_AUTHOR("Emulex Corporation");
26 MODULE_LICENSE("GPL");
27
28 /* num_vfs module param is obsolete.
29  * Use sysfs method to enable/disable VFs.
30  */
31 static unsigned int num_vfs;
32 module_param(num_vfs, uint, 0444);
33 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
34
35 static ushort rx_frag_size = 2048;
36 module_param(rx_frag_size, ushort, 0444);
37 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
38
39 /* Per-module error detection/recovery workq shared across all functions.
40  * Each function schedules its own work request on this shared workq.
41  */
42 static struct workqueue_struct *be_err_recovery_workq;
43
44 static const struct pci_device_id be_dev_ids[] = {
45 #ifdef CONFIG_BE2NET_BE2
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48 #endif /* CONFIG_BE2NET_BE2 */
49 #ifdef CONFIG_BE2NET_BE3
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
51         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
52 #endif /* CONFIG_BE2NET_BE3 */
53 #ifdef CONFIG_BE2NET_LANCER
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 #endif /* CONFIG_BE2NET_LANCER */
57 #ifdef CONFIG_BE2NET_SKYHAWK
58         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
59         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
60 #endif /* CONFIG_BE2NET_SKYHAWK */
61         { 0 }
62 };
63 MODULE_DEVICE_TABLE(pci, be_dev_ids);
64
65 /* Workqueue used by all functions for defering cmd calls to the adapter */
66 static struct workqueue_struct *be_wq;
67
68 /* UE Status Low CSR */
69 static const char * const ue_status_low_desc[] = {
70         "CEV",
71         "CTX",
72         "DBUF",
73         "ERX",
74         "Host",
75         "MPU",
76         "NDMA",
77         "PTC ",
78         "RDMA ",
79         "RXF ",
80         "RXIPS ",
81         "RXULP0 ",
82         "RXULP1 ",
83         "RXULP2 ",
84         "TIM ",
85         "TPOST ",
86         "TPRE ",
87         "TXIPS ",
88         "TXULP0 ",
89         "TXULP1 ",
90         "UC ",
91         "WDMA ",
92         "TXULP2 ",
93         "HOST1 ",
94         "P0_OB_LINK ",
95         "P1_OB_LINK ",
96         "HOST_GPIO ",
97         "MBOX ",
98         "ERX2 ",
99         "SPARE ",
100         "JTAG ",
101         "MPU_INTPEND "
102 };
103
104 /* UE Status High CSR */
105 static const char * const ue_status_hi_desc[] = {
106         "LPCMEMHOST",
107         "MGMT_MAC",
108         "PCS0ONLINE",
109         "MPU_IRAM",
110         "PCS1ONLINE",
111         "PCTL0",
112         "PCTL1",
113         "PMEM",
114         "RR",
115         "TXPB",
116         "RXPP",
117         "XAUI",
118         "TXP",
119         "ARM",
120         "IPC",
121         "HOST2",
122         "HOST3",
123         "HOST4",
124         "HOST5",
125         "HOST6",
126         "HOST7",
127         "ECRC",
128         "Poison TLP",
129         "NETC",
130         "PERIPH",
131         "LLTXULP",
132         "D2P",
133         "RCON",
134         "LDMA",
135         "LLTXP",
136         "LLTXPB",
137         "Unknown"
138 };
139
140 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
141                                  BE_IF_FLAGS_BROADCAST | \
142                                  BE_IF_FLAGS_MULTICAST | \
143                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
144
145 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         if (mem->va) {
150                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
151                                   mem->dma);
152                 mem->va = NULL;
153         }
154 }
155
156 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
157                           u16 len, u16 entry_size)
158 {
159         struct be_dma_mem *mem = &q->dma_mem;
160
161         memset(q, 0, sizeof(*q));
162         q->len = len;
163         q->entry_size = entry_size;
164         mem->size = len * entry_size;
165         mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
166                                      &mem->dma, GFP_KERNEL);
167         if (!mem->va)
168                 return -ENOMEM;
169         return 0;
170 }
171
172 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
173 {
174         u32 reg, enabled;
175
176         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
177                               &reg);
178         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179
180         if (!enabled && enable)
181                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
182         else if (enabled && !enable)
183                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184         else
185                 return;
186
187         pci_write_config_dword(adapter->pdev,
188                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
189 }
190
191 static void be_intr_set(struct be_adapter *adapter, bool enable)
192 {
193         int status = 0;
194
195         /* On lancer interrupts can't be controlled via this register */
196         if (lancer_chip(adapter))
197                 return;
198
199         if (be_check_error(adapter, BE_ERROR_EEH))
200                 return;
201
202         status = be_cmd_intr_set(adapter, enable);
203         if (status)
204                 be_reg_intr_set(adapter, enable);
205 }
206
207 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
208 {
209         u32 val = 0;
210
211         if (be_check_error(adapter, BE_ERROR_HW))
212                 return;
213
214         val |= qid & DB_RQ_RING_ID_MASK;
215         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
216
217         wmb();
218         iowrite32(val, adapter->db + DB_RQ_OFFSET);
219 }
220
221 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
222                           u16 posted)
223 {
224         u32 val = 0;
225
226         if (be_check_error(adapter, BE_ERROR_HW))
227                 return;
228
229         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
230         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
231
232         wmb();
233         iowrite32(val, adapter->db + txo->db_offset);
234 }
235
236 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
237                          bool arm, bool clear_int, u16 num_popped,
238                          u32 eq_delay_mult_enc)
239 {
240         u32 val = 0;
241
242         val |= qid & DB_EQ_RING_ID_MASK;
243         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
244
245         if (be_check_error(adapter, BE_ERROR_HW))
246                 return;
247
248         if (arm)
249                 val |= 1 << DB_EQ_REARM_SHIFT;
250         if (clear_int)
251                 val |= 1 << DB_EQ_CLR_SHIFT;
252         val |= 1 << DB_EQ_EVNT_SHIFT;
253         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
254         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
255         iowrite32(val, adapter->db + DB_EQ_OFFSET);
256 }
257
258 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
259 {
260         u32 val = 0;
261
262         val |= qid & DB_CQ_RING_ID_MASK;
263         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
264                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
265
266         if (be_check_error(adapter, BE_ERROR_HW))
267                 return;
268
269         if (arm)
270                 val |= 1 << DB_CQ_REARM_SHIFT;
271         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
272         iowrite32(val, adapter->db + DB_CQ_OFFSET);
273 }
274
275 static int be_dev_mac_add(struct be_adapter *adapter, const u8 *mac)
276 {
277         int i;
278
279         /* Check if mac has already been added as part of uc-list */
280         for (i = 0; i < adapter->uc_macs; i++) {
281                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
282                         /* mac already added, skip addition */
283                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
284                         return 0;
285                 }
286         }
287
288         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
289                                &adapter->pmac_id[0], 0);
290 }
291
292 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
293 {
294         int i;
295
296         /* Skip deletion if the programmed mac is
297          * being used in uc-list
298          */
299         for (i = 0; i < adapter->uc_macs; i++) {
300                 if (adapter->pmac_id[i + 1] == pmac_id)
301                         return;
302         }
303         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
304 }
305
306 static int be_mac_addr_set(struct net_device *netdev, void *p)
307 {
308         struct be_adapter *adapter = netdev_priv(netdev);
309         struct device *dev = &adapter->pdev->dev;
310         struct sockaddr *addr = p;
311         int status;
312         u8 mac[ETH_ALEN];
313         u32 old_pmac_id = adapter->pmac_id[0];
314
315         if (!is_valid_ether_addr(addr->sa_data))
316                 return -EADDRNOTAVAIL;
317
318         /* Proceed further only if, User provided MAC is different
319          * from active MAC
320          */
321         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
322                 return 0;
323
324         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
325          * address
326          */
327         if (BEx_chip(adapter) && be_virtfn(adapter) &&
328             !check_privilege(adapter, BE_PRIV_FILTMGMT))
329                 return -EPERM;
330
331         /* if device is not running, copy MAC to netdev->dev_addr */
332         if (!netif_running(netdev))
333                 goto done;
334
335         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
336          * privilege or if PF did not provision the new MAC address.
337          * On BE3, this cmd will always fail if the VF doesn't have the
338          * FILTMGMT privilege. This failure is OK, only if the PF programmed
339          * the MAC for the VF.
340          */
341         mutex_lock(&adapter->rx_filter_lock);
342         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
343         if (!status) {
344
345                 /* Delete the old programmed MAC. This call may fail if the
346                  * old MAC was already deleted by the PF driver.
347                  */
348                 if (adapter->pmac_id[0] != old_pmac_id)
349                         be_dev_mac_del(adapter, old_pmac_id);
350         }
351
352         mutex_unlock(&adapter->rx_filter_lock);
353         /* Decide if the new MAC is successfully activated only after
354          * querying the FW
355          */
356         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
357                                        adapter->if_handle, true, 0);
358         if (status)
359                 goto err;
360
361         /* The MAC change did not happen, either due to lack of privilege
362          * or PF didn't pre-provision.
363          */
364         if (!ether_addr_equal(addr->sa_data, mac)) {
365                 status = -EPERM;
366                 goto err;
367         }
368
369         /* Remember currently programmed MAC */
370         ether_addr_copy(adapter->dev_mac, addr->sa_data);
371 done:
372         eth_hw_addr_set(netdev, addr->sa_data);
373         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
374         return 0;
375 err:
376         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
377         return status;
378 }
379
380 /* BE2 supports only v0 cmd */
381 static void *hw_stats_from_cmd(struct be_adapter *adapter)
382 {
383         if (BE2_chip(adapter)) {
384                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
385
386                 return &cmd->hw_stats;
387         } else if (BE3_chip(adapter)) {
388                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
389
390                 return &cmd->hw_stats;
391         } else {
392                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
393
394                 return &cmd->hw_stats;
395         }
396 }
397
398 /* BE2 supports only v0 cmd */
399 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
400 {
401         if (BE2_chip(adapter)) {
402                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
403
404                 return &hw_stats->erx;
405         } else if (BE3_chip(adapter)) {
406                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
407
408                 return &hw_stats->erx;
409         } else {
410                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
411
412                 return &hw_stats->erx;
413         }
414 }
415
416 static void populate_be_v0_stats(struct be_adapter *adapter)
417 {
418         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
419         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
420         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
421         struct be_port_rxf_stats_v0 *port_stats =
422                                         &rxf_stats->port[adapter->port_num];
423         struct be_drv_stats *drvs = &adapter->drv_stats;
424
425         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
426         drvs->rx_pause_frames = port_stats->rx_pause_frames;
427         drvs->rx_crc_errors = port_stats->rx_crc_errors;
428         drvs->rx_control_frames = port_stats->rx_control_frames;
429         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
430         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
431         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
432         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
433         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
434         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
435         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
436         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
437         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
438         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
439         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
440         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
441         drvs->rx_dropped_header_too_small =
442                 port_stats->rx_dropped_header_too_small;
443         drvs->rx_address_filtered =
444                                         port_stats->rx_address_filtered +
445                                         port_stats->rx_vlan_filtered;
446         drvs->rx_alignment_symbol_errors =
447                 port_stats->rx_alignment_symbol_errors;
448
449         drvs->tx_pauseframes = port_stats->tx_pauseframes;
450         drvs->tx_controlframes = port_stats->tx_controlframes;
451
452         if (adapter->port_num)
453                 drvs->jabber_events = rxf_stats->port1_jabber_events;
454         else
455                 drvs->jabber_events = rxf_stats->port0_jabber_events;
456         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
457         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
458         drvs->forwarded_packets = rxf_stats->forwarded_packets;
459         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
460         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
461         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
462         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
463 }
464
465 static void populate_be_v1_stats(struct be_adapter *adapter)
466 {
467         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
468         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
469         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
470         struct be_port_rxf_stats_v1 *port_stats =
471                                         &rxf_stats->port[adapter->port_num];
472         struct be_drv_stats *drvs = &adapter->drv_stats;
473
474         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
475         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
476         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
477         drvs->rx_pause_frames = port_stats->rx_pause_frames;
478         drvs->rx_crc_errors = port_stats->rx_crc_errors;
479         drvs->rx_control_frames = port_stats->rx_control_frames;
480         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
481         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
482         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
483         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
484         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
485         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
486         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
487         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
488         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
489         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
490         drvs->rx_dropped_header_too_small =
491                 port_stats->rx_dropped_header_too_small;
492         drvs->rx_input_fifo_overflow_drop =
493                 port_stats->rx_input_fifo_overflow_drop;
494         drvs->rx_address_filtered = port_stats->rx_address_filtered;
495         drvs->rx_alignment_symbol_errors =
496                 port_stats->rx_alignment_symbol_errors;
497         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
498         drvs->tx_pauseframes = port_stats->tx_pauseframes;
499         drvs->tx_controlframes = port_stats->tx_controlframes;
500         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
501         drvs->jabber_events = port_stats->jabber_events;
502         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
503         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
504         drvs->forwarded_packets = rxf_stats->forwarded_packets;
505         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
506         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
507         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
508         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
509 }
510
511 static void populate_be_v2_stats(struct be_adapter *adapter)
512 {
513         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
514         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
515         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
516         struct be_port_rxf_stats_v2 *port_stats =
517                                         &rxf_stats->port[adapter->port_num];
518         struct be_drv_stats *drvs = &adapter->drv_stats;
519
520         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
521         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
522         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
523         drvs->rx_pause_frames = port_stats->rx_pause_frames;
524         drvs->rx_crc_errors = port_stats->rx_crc_errors;
525         drvs->rx_control_frames = port_stats->rx_control_frames;
526         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
527         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
528         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
529         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
530         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
531         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
532         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
533         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
534         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
535         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
536         drvs->rx_dropped_header_too_small =
537                 port_stats->rx_dropped_header_too_small;
538         drvs->rx_input_fifo_overflow_drop =
539                 port_stats->rx_input_fifo_overflow_drop;
540         drvs->rx_address_filtered = port_stats->rx_address_filtered;
541         drvs->rx_alignment_symbol_errors =
542                 port_stats->rx_alignment_symbol_errors;
543         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
544         drvs->tx_pauseframes = port_stats->tx_pauseframes;
545         drvs->tx_controlframes = port_stats->tx_controlframes;
546         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
547         drvs->jabber_events = port_stats->jabber_events;
548         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
549         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
550         drvs->forwarded_packets = rxf_stats->forwarded_packets;
551         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
552         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
553         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
554         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
555         if (be_roce_supported(adapter)) {
556                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
557                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
558                 drvs->rx_roce_frames = port_stats->roce_frames_received;
559                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
560                 drvs->roce_drops_payload_len =
561                         port_stats->roce_drops_payload_len;
562         }
563 }
564
565 static void populate_lancer_stats(struct be_adapter *adapter)
566 {
567         struct be_drv_stats *drvs = &adapter->drv_stats;
568         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
569
570         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
571         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
572         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
573         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
574         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
575         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
576         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
577         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
578         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
579         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
580         drvs->rx_dropped_tcp_length =
581                                 pport_stats->rx_dropped_invalid_tcp_length;
582         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
583         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
584         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
585         drvs->rx_dropped_header_too_small =
586                                 pport_stats->rx_dropped_header_too_small;
587         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
588         drvs->rx_address_filtered =
589                                         pport_stats->rx_address_filtered +
590                                         pport_stats->rx_vlan_filtered;
591         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
592         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
594         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
595         drvs->jabber_events = pport_stats->rx_jabbers;
596         drvs->forwarded_packets = pport_stats->num_forwards_lo;
597         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
598         drvs->rx_drops_too_many_frags =
599                                 pport_stats->rx_drops_too_many_frags_lo;
600 }
601
602 static void accumulate_16bit_val(u32 *acc, u16 val)
603 {
604 #define lo(x)                   (x & 0xFFFF)
605 #define hi(x)                   (x & 0xFFFF0000)
606         bool wrapped = val < lo(*acc);
607         u32 newacc = hi(*acc) + val;
608
609         if (wrapped)
610                 newacc += 65536;
611         WRITE_ONCE(*acc, newacc);
612 }
613
614 static void populate_erx_stats(struct be_adapter *adapter,
615                                struct be_rx_obj *rxo, u32 erx_stat)
616 {
617         if (!BEx_chip(adapter))
618                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
619         else
620                 /* below erx HW counter can actually wrap around after
621                  * 65535. Driver accumulates a 32-bit value
622                  */
623                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
624                                      (u16)erx_stat);
625 }
626
627 void be_parse_stats(struct be_adapter *adapter)
628 {
629         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
630         struct be_rx_obj *rxo;
631         int i;
632         u32 erx_stat;
633
634         if (lancer_chip(adapter)) {
635                 populate_lancer_stats(adapter);
636         } else {
637                 if (BE2_chip(adapter))
638                         populate_be_v0_stats(adapter);
639                 else if (BE3_chip(adapter))
640                         /* for BE3 */
641                         populate_be_v1_stats(adapter);
642                 else
643                         populate_be_v2_stats(adapter);
644
645                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
646                 for_all_rx_queues(adapter, rxo, i) {
647                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
648                         populate_erx_stats(adapter, rxo, erx_stat);
649                 }
650         }
651 }
652
653 static void be_get_stats64(struct net_device *netdev,
654                            struct rtnl_link_stats64 *stats)
655 {
656         struct be_adapter *adapter = netdev_priv(netdev);
657         struct be_drv_stats *drvs = &adapter->drv_stats;
658         struct be_rx_obj *rxo;
659         struct be_tx_obj *txo;
660         u64 pkts, bytes;
661         unsigned int start;
662         int i;
663
664         for_all_rx_queues(adapter, rxo, i) {
665                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
666
667                 do {
668                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
669                         pkts = rx_stats(rxo)->rx_pkts;
670                         bytes = rx_stats(rxo)->rx_bytes;
671                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
672                 stats->rx_packets += pkts;
673                 stats->rx_bytes += bytes;
674                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
675                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
676                                         rx_stats(rxo)->rx_drops_no_frags;
677         }
678
679         for_all_tx_queues(adapter, txo, i) {
680                 const struct be_tx_stats *tx_stats = tx_stats(txo);
681
682                 do {
683                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
684                         pkts = tx_stats(txo)->tx_pkts;
685                         bytes = tx_stats(txo)->tx_bytes;
686                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
687                 stats->tx_packets += pkts;
688                 stats->tx_bytes += bytes;
689         }
690
691         /* bad pkts received */
692         stats->rx_errors = drvs->rx_crc_errors +
693                 drvs->rx_alignment_symbol_errors +
694                 drvs->rx_in_range_errors +
695                 drvs->rx_out_range_errors +
696                 drvs->rx_frame_too_long +
697                 drvs->rx_dropped_too_small +
698                 drvs->rx_dropped_too_short +
699                 drvs->rx_dropped_header_too_small +
700                 drvs->rx_dropped_tcp_length +
701                 drvs->rx_dropped_runt;
702
703         /* detailed rx errors */
704         stats->rx_length_errors = drvs->rx_in_range_errors +
705                 drvs->rx_out_range_errors +
706                 drvs->rx_frame_too_long;
707
708         stats->rx_crc_errors = drvs->rx_crc_errors;
709
710         /* frame alignment errors */
711         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
712
713         /* receiver fifo overrun */
714         /* drops_no_pbuf is no per i/f, it's per BE card */
715         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
716                                 drvs->rx_input_fifo_overflow_drop +
717                                 drvs->rx_drops_no_pbuf;
718 }
719
720 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
721 {
722         struct net_device *netdev = adapter->netdev;
723
724         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
725                 netif_carrier_off(netdev);
726                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
727         }
728
729         if (link_status)
730                 netif_carrier_on(netdev);
731         else
732                 netif_carrier_off(netdev);
733
734         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
735 }
736
737 static int be_gso_hdr_len(struct sk_buff *skb)
738 {
739         if (skb->encapsulation)
740                 return skb_inner_tcp_all_headers(skb);
741
742         return skb_tcp_all_headers(skb);
743 }
744
745 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
746 {
747         struct be_tx_stats *stats = tx_stats(txo);
748         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
749         /* Account for headers which get duplicated in TSO pkt */
750         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
751
752         u64_stats_update_begin(&stats->sync);
753         stats->tx_reqs++;
754         stats->tx_bytes += skb->len + dup_hdr_len;
755         stats->tx_pkts += tx_pkts;
756         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
757                 stats->tx_vxlan_offload_pkts += tx_pkts;
758         u64_stats_update_end(&stats->sync);
759 }
760
761 /* Returns number of WRBs needed for the skb */
762 static u32 skb_wrb_cnt(struct sk_buff *skb)
763 {
764         /* +1 for the header wrb */
765         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
766 }
767
768 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
769 {
770         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
771         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
772         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
773         wrb->rsvd0 = 0;
774 }
775
776 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
777  * to avoid the swap and shift/mask operations in wrb_fill().
778  */
779 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
780 {
781         wrb->frag_pa_hi = 0;
782         wrb->frag_pa_lo = 0;
783         wrb->frag_len = 0;
784         wrb->rsvd0 = 0;
785 }
786
787 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
788                                      struct sk_buff *skb)
789 {
790         u8 vlan_prio;
791         u16 vlan_tag;
792
793         vlan_tag = skb_vlan_tag_get(skb);
794         vlan_prio = skb_vlan_tag_get_prio(skb);
795         /* If vlan priority provided by OS is NOT in available bmap */
796         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
797                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
798                                 adapter->recommended_prio_bits;
799
800         return vlan_tag;
801 }
802
803 /* Used only for IP tunnel packets */
804 static u16 skb_inner_ip_proto(struct sk_buff *skb)
805 {
806         return (inner_ip_hdr(skb)->version == 4) ?
807                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
808 }
809
810 static u16 skb_ip_proto(struct sk_buff *skb)
811 {
812         return (ip_hdr(skb)->version == 4) ?
813                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
814 }
815
816 static inline bool be_is_txq_full(struct be_tx_obj *txo)
817 {
818         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
819 }
820
821 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) < txo->q.len / 2;
824 }
825
826 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
827 {
828         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
829 }
830
831 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
832                                        struct sk_buff *skb,
833                                        struct be_wrb_params *wrb_params)
834 {
835         u16 proto;
836
837         if (skb_is_gso(skb)) {
838                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
839                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
840                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
841                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
842         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
843                 if (skb->encapsulation) {
844                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
845                         proto = skb_inner_ip_proto(skb);
846                 } else {
847                         proto = skb_ip_proto(skb);
848                 }
849                 if (proto == IPPROTO_TCP)
850                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
851                 else if (proto == IPPROTO_UDP)
852                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
853         }
854
855         if (skb_vlan_tag_present(skb)) {
856                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
857                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
858         }
859
860         BE_WRB_F_SET(wrb_params->features, CRC, 1);
861 }
862
863 static void wrb_fill_hdr(struct be_adapter *adapter,
864                          struct be_eth_hdr_wrb *hdr,
865                          struct be_wrb_params *wrb_params,
866                          struct sk_buff *skb)
867 {
868         memset(hdr, 0, sizeof(*hdr));
869
870         SET_TX_WRB_HDR_BITS(crc, hdr,
871                             BE_WRB_F_GET(wrb_params->features, CRC));
872         SET_TX_WRB_HDR_BITS(ipcs, hdr,
873                             BE_WRB_F_GET(wrb_params->features, IPCS));
874         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
875                             BE_WRB_F_GET(wrb_params->features, TCPCS));
876         SET_TX_WRB_HDR_BITS(udpcs, hdr,
877                             BE_WRB_F_GET(wrb_params->features, UDPCS));
878
879         SET_TX_WRB_HDR_BITS(lso, hdr,
880                             BE_WRB_F_GET(wrb_params->features, LSO));
881         SET_TX_WRB_HDR_BITS(lso6, hdr,
882                             BE_WRB_F_GET(wrb_params->features, LSO6));
883         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
884
885         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
886          * hack is not needed, the evt bit is set while ringing DB.
887          */
888         SET_TX_WRB_HDR_BITS(event, hdr,
889                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
890         SET_TX_WRB_HDR_BITS(vlan, hdr,
891                             BE_WRB_F_GET(wrb_params->features, VLAN));
892         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
893
894         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
895         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
896         SET_TX_WRB_HDR_BITS(mgmt, hdr,
897                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
898 }
899
900 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
901                           bool unmap_single)
902 {
903         dma_addr_t dma;
904         u32 frag_len = le32_to_cpu(wrb->frag_len);
905
906
907         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
908                 (u64)le32_to_cpu(wrb->frag_pa_lo);
909         if (frag_len) {
910                 if (unmap_single)
911                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
912                 else
913                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
914         }
915 }
916
917 /* Grab a WRB header for xmit */
918 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
919 {
920         u32 head = txo->q.head;
921
922         queue_head_inc(&txo->q);
923         return head;
924 }
925
926 /* Set up the WRB header for xmit */
927 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
928                                 struct be_tx_obj *txo,
929                                 struct be_wrb_params *wrb_params,
930                                 struct sk_buff *skb, u16 head)
931 {
932         u32 num_frags = skb_wrb_cnt(skb);
933         struct be_queue_info *txq = &txo->q;
934         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
935
936         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
937         be_dws_cpu_to_le(hdr, sizeof(*hdr));
938
939         BUG_ON(txo->sent_skb_list[head]);
940         txo->sent_skb_list[head] = skb;
941         txo->last_req_hdr = head;
942         atomic_add(num_frags, &txq->used);
943         txo->last_req_wrb_cnt = num_frags;
944         txo->pend_wrb_cnt += num_frags;
945 }
946
947 /* Setup a WRB fragment (buffer descriptor) for xmit */
948 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
949                                  int len)
950 {
951         struct be_eth_wrb *wrb;
952         struct be_queue_info *txq = &txo->q;
953
954         wrb = queue_head_node(txq);
955         wrb_fill(wrb, busaddr, len);
956         queue_head_inc(txq);
957 }
958
959 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
960  * was invoked. The producer index is restored to the previous packet and the
961  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
962  */
963 static void be_xmit_restore(struct be_adapter *adapter,
964                             struct be_tx_obj *txo, u32 head, bool map_single,
965                             u32 copied)
966 {
967         struct device *dev;
968         struct be_eth_wrb *wrb;
969         struct be_queue_info *txq = &txo->q;
970
971         dev = &adapter->pdev->dev;
972         txq->head = head;
973
974         /* skip the first wrb (hdr); it's not mapped */
975         queue_head_inc(txq);
976         while (copied) {
977                 wrb = queue_head_node(txq);
978                 unmap_tx_frag(dev, wrb, map_single);
979                 map_single = false;
980                 copied -= le32_to_cpu(wrb->frag_len);
981                 queue_head_inc(txq);
982         }
983
984         txq->head = head;
985 }
986
987 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
988  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
989  * of WRBs used up by the packet.
990  */
991 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
992                            struct sk_buff *skb,
993                            struct be_wrb_params *wrb_params)
994 {
995         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
996         struct device *dev = &adapter->pdev->dev;
997         bool map_single = false;
998         u32 head;
999         dma_addr_t busaddr;
1000         int len;
1001
1002         head = be_tx_get_wrb_hdr(txo);
1003
1004         if (skb->len > skb->data_len) {
1005                 len = skb_headlen(skb);
1006
1007                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1008                 if (dma_mapping_error(dev, busaddr))
1009                         goto dma_err;
1010                 map_single = true;
1011                 be_tx_setup_wrb_frag(txo, busaddr, len);
1012                 copied += len;
1013         }
1014
1015         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1016                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1017                 len = skb_frag_size(frag);
1018
1019                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1020                 if (dma_mapping_error(dev, busaddr))
1021                         goto dma_err;
1022                 be_tx_setup_wrb_frag(txo, busaddr, len);
1023                 copied += len;
1024         }
1025
1026         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1027
1028         be_tx_stats_update(txo, skb);
1029         return wrb_cnt;
1030
1031 dma_err:
1032         adapter->drv_stats.dma_map_errors++;
1033         be_xmit_restore(adapter, txo, head, map_single, copied);
1034         return 0;
1035 }
1036
1037 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1038 {
1039         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1040 }
1041
1042 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1043                                              struct sk_buff *skb,
1044                                              struct be_wrb_params
1045                                              *wrb_params)
1046 {
1047         bool insert_vlan = false;
1048         u16 vlan_tag = 0;
1049
1050         skb = skb_share_check(skb, GFP_ATOMIC);
1051         if (unlikely(!skb))
1052                 return skb;
1053
1054         if (skb_vlan_tag_present(skb)) {
1055                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1056                 insert_vlan = true;
1057         }
1058
1059         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1060                 if (!insert_vlan) {
1061                         vlan_tag = adapter->pvid;
1062                         insert_vlan = true;
1063                 }
1064                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                  * skip VLAN insertion
1066                  */
1067                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068         }
1069
1070         if (insert_vlan) {
1071                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                 vlan_tag);
1073                 if (unlikely(!skb))
1074                         return skb;
1075                 __vlan_hwaccel_clear_tag(skb);
1076         }
1077
1078         /* Insert the outer VLAN, if any */
1079         if (adapter->qnq_vid) {
1080                 vlan_tag = adapter->qnq_vid;
1081                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                 vlan_tag);
1083                 if (unlikely(!skb))
1084                         return skb;
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086         }
1087
1088         return skb;
1089 }
1090
1091 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092 {
1093         struct ethhdr *eh = (struct ethhdr *)skb->data;
1094         u16 offset = ETH_HLEN;
1095
1096         if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                 offset += sizeof(struct ipv6hdr);
1100                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                     ip6h->nexthdr != NEXTHDR_UDP) {
1102                         struct ipv6_opt_hdr *ehdr =
1103                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                         if (ehdr->hdrlen == 0xff)
1107                                 return true;
1108                 }
1109         }
1110         return false;
1111 }
1112
1113 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114 {
1115         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116 }
1117
1118 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121 }
1122
1123 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                   struct sk_buff *skb,
1125                                                   struct be_wrb_params
1126                                                   *wrb_params)
1127 {
1128         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129         unsigned int eth_hdr_len;
1130         struct iphdr *ip;
1131
1132         /* For padded packets, BE HW modifies tot_len field in IP header
1133          * incorrecly when VLAN tag is inserted by HW.
1134          * For padded packets, Lancer computes incorrect checksum.
1135          */
1136         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                 VLAN_ETH_HLEN : ETH_HLEN;
1138         if (skb->len <= 60 &&
1139             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140             is_ipv4_pkt(skb)) {
1141                 ip = (struct iphdr *)ip_hdr(skb);
1142                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143         }
1144
1145         /* If vlan tag is already inlined in the packet, skip HW VLAN
1146          * tagging in pvid-tagging mode
1147          */
1148         if (be_pvid_tagging_enabled(adapter) &&
1149             veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152         /* HW has a bug wherein it will calculate CSUM for VLAN
1153          * pkts even though it is disabled.
1154          * Manually insert VLAN in pkt.
1155          */
1156         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157             skb_vlan_tag_present(skb)) {
1158                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                 if (unlikely(!skb))
1160                         goto err;
1161         }
1162
1163         /* HW may lockup when VLAN HW tagging is requested on
1164          * certain ipv6 packets. Drop such pkts if the HW workaround to
1165          * skip HW tagging is not enabled by FW.
1166          */
1167         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                      (adapter->pvid || adapter->qnq_vid) &&
1169                      !qnq_async_evt_rcvd(adapter)))
1170                 goto tx_drop;
1171
1172         /* Manual VLAN tag insertion to prevent:
1173          * ASIC lockup when the ASIC inserts VLAN tag into
1174          * certain ipv6 packets. Insert VLAN tags in driver,
1175          * and set event, completion, vlan bits accordingly
1176          * in the Tx WRB.
1177          */
1178         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179             be_vlan_tag_tx_chk(adapter, skb)) {
1180                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                 if (unlikely(!skb))
1182                         goto err;
1183         }
1184
1185         return skb;
1186 tx_drop:
1187         dev_kfree_skb_any(skb);
1188 err:
1189         return NULL;
1190 }
1191
1192 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                            struct sk_buff *skb,
1194                                            struct be_wrb_params *wrb_params)
1195 {
1196         int err;
1197
1198         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199          * packets that are 32b or less may cause a transmit stall
1200          * on that port. The workaround is to pad such packets
1201          * (len <= 32 bytes) to a minimum length of 36b.
1202          */
1203         if (skb->len <= 32) {
1204                 if (skb_put_padto(skb, 36))
1205                         return NULL;
1206         }
1207
1208         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                 if (!skb)
1211                         return NULL;
1212         }
1213
1214         /* The stack can send us skbs with length greater than
1215          * what the HW can handle. Trim the extra bytes.
1216          */
1217         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219         WARN_ON(err);
1220
1221         return skb;
1222 }
1223
1224 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225 {
1226         struct be_queue_info *txq = &txo->q;
1227         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229         /* Mark the last request eventable if it hasn't been marked already */
1230         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233         /* compose a dummy wrb if there are odd set of wrbs to notify */
1234         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                 wrb_fill_dummy(queue_head_node(txq));
1236                 queue_head_inc(txq);
1237                 atomic_inc(&txq->used);
1238                 txo->pend_wrb_cnt++;
1239                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                            TX_HDR_WRB_NUM_SHIFT);
1241                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                           TX_HDR_WRB_NUM_SHIFT);
1243         }
1244         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245         txo->pend_wrb_cnt = 0;
1246 }
1247
1248 /* OS2BMC related */
1249
1250 #define DHCP_CLIENT_PORT        68
1251 #define DHCP_SERVER_PORT        67
1252 #define NET_BIOS_PORT1          137
1253 #define NET_BIOS_PORT2          138
1254 #define DHCPV6_RAS_PORT         547
1255
1256 #define is_mc_allowed_on_bmc(adapter, eh)       \
1257         (!is_multicast_filt_enabled(adapter) && \
1258          is_multicast_ether_addr(eh->h_dest) && \
1259          !is_broadcast_ether_addr(eh->h_dest))
1260
1261 #define is_bc_allowed_on_bmc(adapter, eh)       \
1262         (!is_broadcast_filt_enabled(adapter) && \
1263          is_broadcast_ether_addr(eh->h_dest))
1264
1265 #define is_arp_allowed_on_bmc(adapter, skb)     \
1266         (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1269
1270 #define is_arp_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273 #define is_dhcp_client_filt_enabled(adapter)    \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276 #define is_dhcp_srvr_filt_enabled(adapter)      \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279 #define is_nbios_filt_enabled(adapter)  \
1280                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282 #define is_ipv6_na_filt_enabled(adapter)        \
1283                 (adapter->bmc_filt_mask &       \
1284                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286 #define is_ipv6_ra_filt_enabled(adapter)        \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289 #define is_ipv6_ras_filt_enabled(adapter)       \
1290                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292 #define is_broadcast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295 #define is_multicast_filt_enabled(adapter)      \
1296                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299                                struct sk_buff **skb)
1300 {
1301         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302         bool os2bmc = false;
1303
1304         if (!be_is_os2bmc_enabled(adapter))
1305                 goto done;
1306
1307         if (!is_multicast_ether_addr(eh->h_dest))
1308                 goto done;
1309
1310         if (is_mc_allowed_on_bmc(adapter, eh) ||
1311             is_bc_allowed_on_bmc(adapter, eh) ||
1312             is_arp_allowed_on_bmc(adapter, (*skb))) {
1313                 os2bmc = true;
1314                 goto done;
1315         }
1316
1317         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319                 u8 nexthdr = hdr->nexthdr;
1320
1321                 if (nexthdr == IPPROTO_ICMPV6) {
1322                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324                         switch (icmp6->icmp6_type) {
1325                         case NDISC_ROUTER_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327                                 goto done;
1328                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1330                                 goto done;
1331                         default:
1332                                 break;
1333                         }
1334                 }
1335         }
1336
1337         if (is_udp_pkt((*skb))) {
1338                 struct udphdr *udp = udp_hdr((*skb));
1339
1340                 switch (ntohs(udp->dest)) {
1341                 case DHCP_CLIENT_PORT:
1342                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1343                         goto done;
1344                 case DHCP_SERVER_PORT:
1345                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346                         goto done;
1347                 case NET_BIOS_PORT1:
1348                 case NET_BIOS_PORT2:
1349                         os2bmc = is_nbios_filt_enabled(adapter);
1350                         goto done;
1351                 case DHCPV6_RAS_PORT:
1352                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353                         goto done;
1354                 default:
1355                         break;
1356                 }
1357         }
1358 done:
1359         /* For packets over a vlan, which are destined
1360          * to BMC, asic expects the vlan to be inline in the packet.
1361          */
1362         if (os2bmc)
1363                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365         return os2bmc;
1366 }
1367
1368 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369 {
1370         struct be_adapter *adapter = netdev_priv(netdev);
1371         u16 q_idx = skb_get_queue_mapping(skb);
1372         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373         struct be_wrb_params wrb_params = { 0 };
1374         bool flush = !netdev_xmit_more();
1375         u16 wrb_cnt;
1376
1377         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378         if (unlikely(!skb))
1379                 goto drop;
1380
1381         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384         if (unlikely(!wrb_cnt)) {
1385                 dev_kfree_skb_any(skb);
1386                 goto drop;
1387         }
1388
1389         /* if os2bmc is enabled and if the pkt is destined to bmc,
1390          * enqueue the pkt a 2nd time with mgmt bit set.
1391          */
1392         if (be_send_pkt_to_bmc(adapter, &skb)) {
1393                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395                 if (unlikely(!wrb_cnt))
1396                         goto drop;
1397                 else
1398                         skb_get(skb);
1399         }
1400
1401         if (be_is_txq_full(txo)) {
1402                 netif_stop_subqueue(netdev, q_idx);
1403                 tx_stats(txo)->tx_stops++;
1404         }
1405
1406         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407                 be_xmit_flush(adapter, txo);
1408
1409         return NETDEV_TX_OK;
1410 drop:
1411         tx_stats(txo)->tx_drv_drops++;
1412         /* Flush the already enqueued tx requests */
1413         if (flush && txo->pend_wrb_cnt)
1414                 be_xmit_flush(adapter, txo);
1415
1416         return NETDEV_TX_OK;
1417 }
1418
1419 static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420 {
1421         struct be_adapter *adapter = netdev_priv(netdev);
1422         struct device *dev = &adapter->pdev->dev;
1423         struct be_tx_obj *txo;
1424         struct sk_buff *skb;
1425         struct tcphdr *tcphdr;
1426         struct udphdr *udphdr;
1427         u32 *entry;
1428         int status;
1429         int i, j;
1430
1431         for_all_tx_queues(adapter, txo, i) {
1432                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433                          i, txo->q.head, txo->q.tail,
1434                          atomic_read(&txo->q.used), txo->q.id);
1435
1436                 entry = txo->q.dma_mem.va;
1437                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1439                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1440                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441                                          j, entry[j], entry[j + 1],
1442                                          entry[j + 2], entry[j + 3]);
1443                         }
1444                 }
1445
1446                 entry = txo->cq.dma_mem.va;
1447                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448                          i, txo->cq.head, txo->cq.tail,
1449                          atomic_read(&txo->cq.used));
1450                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1452                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1453                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454                                          j, entry[j], entry[j + 1],
1455                                          entry[j + 2], entry[j + 3]);
1456                         }
1457                 }
1458
1459                 for (j = 0; j < TX_Q_LEN; j++) {
1460                         if (txo->sent_skb_list[j]) {
1461                                 skb = txo->sent_skb_list[j];
1462                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463                                         tcphdr = tcp_hdr(skb);
1464                                         dev_info(dev, "TCP source port %d\n",
1465                                                  ntohs(tcphdr->source));
1466                                         dev_info(dev, "TCP dest port %d\n",
1467                                                  ntohs(tcphdr->dest));
1468                                         dev_info(dev, "TCP sequence num %d\n",
1469                                                  ntohs(tcphdr->seq));
1470                                         dev_info(dev, "TCP ack_seq %d\n",
1471                                                  ntohs(tcphdr->ack_seq));
1472                                 } else if (ip_hdr(skb)->protocol ==
1473                                            IPPROTO_UDP) {
1474                                         udphdr = udp_hdr(skb);
1475                                         dev_info(dev, "UDP source port %d\n",
1476                                                  ntohs(udphdr->source));
1477                                         dev_info(dev, "UDP dest port %d\n",
1478                                                  ntohs(udphdr->dest));
1479                                 }
1480                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481                                          j, skb, skb->len, skb->protocol);
1482                         }
1483                 }
1484         }
1485
1486         if (lancer_chip(adapter)) {
1487                 dev_info(dev, "Initiating reset due to tx timeout\n");
1488                 dev_info(dev, "Resetting adapter\n");
1489                 status = lancer_physdev_ctrl(adapter,
1490                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1491                 if (status)
1492                         dev_err(dev, "Reset failed .. Reboot server\n");
1493         }
1494 }
1495
1496 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497 {
1498         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1500 }
1501
1502 static int be_set_vlan_promisc(struct be_adapter *adapter)
1503 {
1504         struct device *dev = &adapter->pdev->dev;
1505         int status;
1506
1507         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508                 return 0;
1509
1510         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511         if (!status) {
1512                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514         } else {
1515                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516         }
1517         return status;
1518 }
1519
1520 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521 {
1522         struct device *dev = &adapter->pdev->dev;
1523         int status;
1524
1525         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526         if (!status) {
1527                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529         }
1530         return status;
1531 }
1532
1533 /*
1534  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535  * If the user configures more, place BE in vlan promiscuous mode.
1536  */
1537 static int be_vid_config(struct be_adapter *adapter)
1538 {
1539         struct device *dev = &adapter->pdev->dev;
1540         u16 vids[BE_NUM_VLANS_SUPPORTED];
1541         u16 num = 0, i = 0;
1542         int status = 0;
1543
1544         /* No need to change the VLAN state if the I/F is in promiscuous */
1545         if (adapter->netdev->flags & IFF_PROMISC)
1546                 return 0;
1547
1548         if (adapter->vlans_added > be_max_vlans(adapter))
1549                 return be_set_vlan_promisc(adapter);
1550
1551         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552                 status = be_clear_vlan_promisc(adapter);
1553                 if (status)
1554                         return status;
1555         }
1556         /* Construct VLAN Table to give to HW */
1557         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558                 vids[num++] = cpu_to_le16(i);
1559
1560         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561         if (status) {
1562                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1563                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1564                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565                     addl_status(status) ==
1566                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567                         return be_set_vlan_promisc(adapter);
1568         }
1569         return status;
1570 }
1571
1572 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573 {
1574         struct be_adapter *adapter = netdev_priv(netdev);
1575         int status = 0;
1576
1577         mutex_lock(&adapter->rx_filter_lock);
1578
1579         /* Packets with VID 0 are always received by Lancer by default */
1580         if (lancer_chip(adapter) && vid == 0)
1581                 goto done;
1582
1583         if (test_bit(vid, adapter->vids))
1584                 goto done;
1585
1586         set_bit(vid, adapter->vids);
1587         adapter->vlans_added++;
1588
1589         status = be_vid_config(adapter);
1590 done:
1591         mutex_unlock(&adapter->rx_filter_lock);
1592         return status;
1593 }
1594
1595 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598         int status = 0;
1599
1600         mutex_lock(&adapter->rx_filter_lock);
1601
1602         /* Packets with VID 0 are always received by Lancer by default */
1603         if (lancer_chip(adapter) && vid == 0)
1604                 goto done;
1605
1606         if (!test_bit(vid, adapter->vids))
1607                 goto done;
1608
1609         clear_bit(vid, adapter->vids);
1610         adapter->vlans_added--;
1611
1612         status = be_vid_config(adapter);
1613 done:
1614         mutex_unlock(&adapter->rx_filter_lock);
1615         return status;
1616 }
1617
1618 static void be_set_all_promisc(struct be_adapter *adapter)
1619 {
1620         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622 }
1623
1624 static void be_set_mc_promisc(struct be_adapter *adapter)
1625 {
1626         int status;
1627
1628         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629                 return;
1630
1631         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632         if (!status)
1633                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634 }
1635
1636 static void be_set_uc_promisc(struct be_adapter *adapter)
1637 {
1638         int status;
1639
1640         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641                 return;
1642
1643         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644         if (!status)
1645                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646 }
1647
1648 static void be_clear_uc_promisc(struct be_adapter *adapter)
1649 {
1650         int status;
1651
1652         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653                 return;
1654
1655         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656         if (!status)
1657                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658 }
1659
1660 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661  * We use a single callback function for both sync and unsync. We really don't
1662  * add/remove addresses through this callback. But, we use it to detect changes
1663  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664  */
1665 static int be_uc_list_update(struct net_device *netdev,
1666                              const unsigned char *addr)
1667 {
1668         struct be_adapter *adapter = netdev_priv(netdev);
1669
1670         adapter->update_uc_list = true;
1671         return 0;
1672 }
1673
1674 static int be_mc_list_update(struct net_device *netdev,
1675                              const unsigned char *addr)
1676 {
1677         struct be_adapter *adapter = netdev_priv(netdev);
1678
1679         adapter->update_mc_list = true;
1680         return 0;
1681 }
1682
1683 static void be_set_mc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool mc_promisc = false;
1688         int status;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_mc_list = false;
1695         } else if (netdev->flags & IFF_ALLMULTI ||
1696                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697                 /* Enable multicast promisc if num configured exceeds
1698                  * what we support
1699                  */
1700                 mc_promisc = true;
1701                 adapter->update_mc_list = false;
1702         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703                 /* Update mc-list unconditionally if the iface was previously
1704                  * in mc-promisc mode and now is out of that mode.
1705                  */
1706                 adapter->update_mc_list = true;
1707         }
1708
1709         if (adapter->update_mc_list) {
1710                 int i = 0;
1711
1712                 /* cache the mc-list in adapter */
1713                 netdev_for_each_mc_addr(ha, netdev) {
1714                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715                         i++;
1716                 }
1717                 adapter->mc_count = netdev_mc_count(netdev);
1718         }
1719         netif_addr_unlock_bh(netdev);
1720
1721         if (mc_promisc) {
1722                 be_set_mc_promisc(adapter);
1723         } else if (adapter->update_mc_list) {
1724                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725                 if (!status)
1726                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727                 else
1728                         be_set_mc_promisc(adapter);
1729
1730                 adapter->update_mc_list = false;
1731         }
1732 }
1733
1734 static void be_clear_mc_list(struct be_adapter *adapter)
1735 {
1736         struct net_device *netdev = adapter->netdev;
1737
1738         __dev_mc_unsync(netdev, NULL);
1739         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740         adapter->mc_count = 0;
1741 }
1742
1743 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744 {
1745         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747                 return 0;
1748         }
1749
1750         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751                                adapter->if_handle,
1752                                &adapter->pmac_id[uc_idx + 1], 0);
1753 }
1754
1755 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756 {
1757         if (pmac_id == adapter->pmac_id[0])
1758                 return;
1759
1760         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761 }
1762
1763 static void be_set_uc_list(struct be_adapter *adapter)
1764 {
1765         struct net_device *netdev = adapter->netdev;
1766         struct netdev_hw_addr *ha;
1767         bool uc_promisc = false;
1768         int curr_uc_macs = 0, i;
1769
1770         netif_addr_lock_bh(netdev);
1771         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773         if (netdev->flags & IFF_PROMISC) {
1774                 adapter->update_uc_list = false;
1775         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776                 uc_promisc = true;
1777                 adapter->update_uc_list = false;
1778         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779                 /* Update uc-list unconditionally if the iface was previously
1780                  * in uc-promisc mode and now is out of that mode.
1781                  */
1782                 adapter->update_uc_list = true;
1783         }
1784
1785         if (adapter->update_uc_list) {
1786                 /* cache the uc-list in adapter array */
1787                 i = 0;
1788                 netdev_for_each_uc_addr(ha, netdev) {
1789                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790                         i++;
1791                 }
1792                 curr_uc_macs = netdev_uc_count(netdev);
1793         }
1794         netif_addr_unlock_bh(netdev);
1795
1796         if (uc_promisc) {
1797                 be_set_uc_promisc(adapter);
1798         } else if (adapter->update_uc_list) {
1799                 be_clear_uc_promisc(adapter);
1800
1801                 for (i = 0; i < adapter->uc_macs; i++)
1802                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804                 for (i = 0; i < curr_uc_macs; i++)
1805                         be_uc_mac_add(adapter, i);
1806                 adapter->uc_macs = curr_uc_macs;
1807                 adapter->update_uc_list = false;
1808         }
1809 }
1810
1811 static void be_clear_uc_list(struct be_adapter *adapter)
1812 {
1813         struct net_device *netdev = adapter->netdev;
1814         int i;
1815
1816         __dev_uc_unsync(netdev, NULL);
1817         for (i = 0; i < adapter->uc_macs; i++)
1818                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820         adapter->uc_macs = 0;
1821 }
1822
1823 static void __be_set_rx_mode(struct be_adapter *adapter)
1824 {
1825         struct net_device *netdev = adapter->netdev;
1826
1827         mutex_lock(&adapter->rx_filter_lock);
1828
1829         if (netdev->flags & IFF_PROMISC) {
1830                 if (!be_in_all_promisc(adapter))
1831                         be_set_all_promisc(adapter);
1832         } else if (be_in_all_promisc(adapter)) {
1833                 /* We need to re-program the vlan-list or clear
1834                  * vlan-promisc mode (if needed) when the interface
1835                  * comes out of promisc mode.
1836                  */
1837                 be_vid_config(adapter);
1838         }
1839
1840         be_set_uc_list(adapter);
1841         be_set_mc_list(adapter);
1842
1843         mutex_unlock(&adapter->rx_filter_lock);
1844 }
1845
1846 static void be_work_set_rx_mode(struct work_struct *work)
1847 {
1848         struct be_cmd_work *cmd_work =
1849                                 container_of(work, struct be_cmd_work, work);
1850
1851         __be_set_rx_mode(cmd_work->adapter);
1852         kfree(cmd_work);
1853 }
1854
1855 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856 {
1857         struct be_adapter *adapter = netdev_priv(netdev);
1858         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859         int status;
1860
1861         if (!sriov_enabled(adapter))
1862                 return -EPERM;
1863
1864         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865                 return -EINVAL;
1866
1867         /* Proceed further only if user provided MAC is different
1868          * from active MAC
1869          */
1870         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871                 return 0;
1872
1873         if (BEx_chip(adapter)) {
1874                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875                                 vf + 1);
1876
1877                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878                                          &vf_cfg->pmac_id, vf + 1);
1879         } else {
1880                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881                                         vf + 1);
1882         }
1883
1884         if (status) {
1885                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886                         mac, vf, status);
1887                 return be_cmd_status(status);
1888         }
1889
1890         ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892         return 0;
1893 }
1894
1895 static int be_get_vf_config(struct net_device *netdev, int vf,
1896                             struct ifla_vf_info *vi)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901         if (!sriov_enabled(adapter))
1902                 return -EPERM;
1903
1904         if (vf >= adapter->num_vfs)
1905                 return -EINVAL;
1906
1907         vi->vf = vf;
1908         vi->max_tx_rate = vf_cfg->tx_rate;
1909         vi->min_tx_rate = 0;
1910         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916         return 0;
1917 }
1918
1919 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920 {
1921         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922         u16 vids[BE_NUM_VLANS_SUPPORTED];
1923         int vf_if_id = vf_cfg->if_handle;
1924         int status;
1925
1926         /* Enable Transparent VLAN Tagging */
1927         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928         if (status)
1929                 return status;
1930
1931         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932         vids[0] = 0;
1933         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934         if (!status)
1935                 dev_info(&adapter->pdev->dev,
1936                          "Cleared guest VLANs on VF%d", vf);
1937
1938         /* After TVT is enabled, disallow VFs to program VLAN filters */
1939         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1942                 if (!status)
1943                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944         }
1945         return 0;
1946 }
1947
1948 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949 {
1950         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951         struct device *dev = &adapter->pdev->dev;
1952         int status;
1953
1954         /* Reset Transparent VLAN Tagging. */
1955         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956                                        vf_cfg->if_handle, 0, 0);
1957         if (status)
1958                 return status;
1959
1960         /* Allow VFs to program VLAN filtering */
1961         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963                                                   BE_PRIV_FILTMGMT, vf + 1);
1964                 if (!status) {
1965                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967                 }
1968         }
1969
1970         dev_info(dev,
1971                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972         return 0;
1973 }
1974
1975 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976                           __be16 vlan_proto)
1977 {
1978         struct be_adapter *adapter = netdev_priv(netdev);
1979         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980         int status;
1981
1982         if (!sriov_enabled(adapter))
1983                 return -EPERM;
1984
1985         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986                 return -EINVAL;
1987
1988         if (vlan_proto != htons(ETH_P_8021Q))
1989                 return -EPROTONOSUPPORT;
1990
1991         if (vlan || qos) {
1992                 vlan |= qos << VLAN_PRIO_SHIFT;
1993                 status = be_set_vf_tvt(adapter, vf, vlan);
1994         } else {
1995                 status = be_clear_vf_tvt(adapter, vf);
1996         }
1997
1998         if (status) {
1999                 dev_err(&adapter->pdev->dev,
2000                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001                         status);
2002                 return be_cmd_status(status);
2003         }
2004
2005         vf_cfg->vlan_tag = vlan;
2006         return 0;
2007 }
2008
2009 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010                              int min_tx_rate, int max_tx_rate)
2011 {
2012         struct be_adapter *adapter = netdev_priv(netdev);
2013         struct device *dev = &adapter->pdev->dev;
2014         int percent_rate, status = 0;
2015         u16 link_speed = 0;
2016         u8 link_status;
2017
2018         if (!sriov_enabled(adapter))
2019                 return -EPERM;
2020
2021         if (vf >= adapter->num_vfs)
2022                 return -EINVAL;
2023
2024         if (min_tx_rate)
2025                 return -EINVAL;
2026
2027         if (!max_tx_rate)
2028                 goto config_qos;
2029
2030         status = be_cmd_link_status_query(adapter, &link_speed,
2031                                           &link_status, 0);
2032         if (status)
2033                 goto err;
2034
2035         if (!link_status) {
2036                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037                 status = -ENETDOWN;
2038                 goto err;
2039         }
2040
2041         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043                         link_speed);
2044                 status = -EINVAL;
2045                 goto err;
2046         }
2047
2048         /* On Skyhawk the QOS setting must be done only as a % value */
2049         percent_rate = link_speed / 100;
2050         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052                         percent_rate);
2053                 status = -EINVAL;
2054                 goto err;
2055         }
2056
2057 config_qos:
2058         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059         if (status)
2060                 goto err;
2061
2062         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063         return 0;
2064
2065 err:
2066         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067                 max_tx_rate, vf);
2068         return be_cmd_status(status);
2069 }
2070
2071 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072                                 int link_state)
2073 {
2074         struct be_adapter *adapter = netdev_priv(netdev);
2075         int status;
2076
2077         if (!sriov_enabled(adapter))
2078                 return -EPERM;
2079
2080         if (vf >= adapter->num_vfs)
2081                 return -EINVAL;
2082
2083         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084         if (status) {
2085                 dev_err(&adapter->pdev->dev,
2086                         "Link state change on VF %d failed: %#x\n", vf, status);
2087                 return be_cmd_status(status);
2088         }
2089
2090         adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092         return 0;
2093 }
2094
2095 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096 {
2097         struct be_adapter *adapter = netdev_priv(netdev);
2098         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099         u8 spoofchk;
2100         int status;
2101
2102         if (!sriov_enabled(adapter))
2103                 return -EPERM;
2104
2105         if (vf >= adapter->num_vfs)
2106                 return -EINVAL;
2107
2108         if (BEx_chip(adapter))
2109                 return -EOPNOTSUPP;
2110
2111         if (enable == vf_cfg->spoofchk)
2112                 return 0;
2113
2114         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117                                        0, spoofchk);
2118         if (status) {
2119                 dev_err(&adapter->pdev->dev,
2120                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2121                 return be_cmd_status(status);
2122         }
2123
2124         vf_cfg->spoofchk = enable;
2125         return 0;
2126 }
2127
2128 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129                           ulong now)
2130 {
2131         aic->rx_pkts_prev = rx_pkts;
2132         aic->tx_reqs_prev = tx_pkts;
2133         aic->jiffies = now;
2134 }
2135
2136 static int be_get_new_eqd(struct be_eq_obj *eqo)
2137 {
2138         struct be_adapter *adapter = eqo->adapter;
2139         int eqd, start;
2140         struct be_aic_obj *aic;
2141         struct be_rx_obj *rxo;
2142         struct be_tx_obj *txo;
2143         u64 rx_pkts = 0, tx_pkts = 0;
2144         ulong now;
2145         u32 pps, delta;
2146         int i;
2147
2148         aic = &adapter->aic_obj[eqo->idx];
2149         if (!adapter->aic_enabled) {
2150                 if (aic->jiffies)
2151                         aic->jiffies = 0;
2152                 eqd = aic->et_eqd;
2153                 return eqd;
2154         }
2155
2156         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157                 do {
2158                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2159                         rx_pkts += rxo->stats.rx_pkts;
2160                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2161         }
2162
2163         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164                 do {
2165                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2166                         tx_pkts += txo->stats.tx_reqs;
2167                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2168         }
2169
2170         /* Skip, if wrapped around or first calculation */
2171         now = jiffies;
2172         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173             rx_pkts < aic->rx_pkts_prev ||
2174             tx_pkts < aic->tx_reqs_prev) {
2175                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2176                 return aic->prev_eqd;
2177         }
2178
2179         delta = jiffies_to_msecs(now - aic->jiffies);
2180         if (delta == 0)
2181                 return aic->prev_eqd;
2182
2183         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185         eqd = (pps / 15000) << 2;
2186
2187         if (eqd < 8)
2188                 eqd = 0;
2189         eqd = min_t(u32, eqd, aic->max_eqd);
2190         eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192         be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194         return eqd;
2195 }
2196
2197 /* For Skyhawk-R only */
2198 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199 {
2200         struct be_adapter *adapter = eqo->adapter;
2201         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202         ulong now = jiffies;
2203         int eqd;
2204         u32 mult_enc;
2205
2206         if (!adapter->aic_enabled)
2207                 return 0;
2208
2209         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210                 eqd = aic->prev_eqd;
2211         else
2212                 eqd = be_get_new_eqd(eqo);
2213
2214         if (eqd > 100)
2215                 mult_enc = R2I_DLY_ENC_1;
2216         else if (eqd > 60)
2217                 mult_enc = R2I_DLY_ENC_2;
2218         else if (eqd > 20)
2219                 mult_enc = R2I_DLY_ENC_3;
2220         else
2221                 mult_enc = R2I_DLY_ENC_0;
2222
2223         aic->prev_eqd = eqd;
2224
2225         return mult_enc;
2226 }
2227
2228 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229 {
2230         struct be_set_eqd set_eqd[MAX_EVT_QS];
2231         struct be_aic_obj *aic;
2232         struct be_eq_obj *eqo;
2233         int i, num = 0, eqd;
2234
2235         for_all_evt_queues(adapter, eqo, i) {
2236                 aic = &adapter->aic_obj[eqo->idx];
2237                 eqd = be_get_new_eqd(eqo);
2238                 if (force_update || eqd != aic->prev_eqd) {
2239                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240                         set_eqd[num].eq_id = eqo->q.id;
2241                         aic->prev_eqd = eqd;
2242                         num++;
2243                 }
2244         }
2245
2246         if (num)
2247                 be_cmd_modify_eqd(adapter, set_eqd, num);
2248 }
2249
2250 static void be_rx_stats_update(struct be_rx_obj *rxo,
2251                                struct be_rx_compl_info *rxcp)
2252 {
2253         struct be_rx_stats *stats = rx_stats(rxo);
2254
2255         u64_stats_update_begin(&stats->sync);
2256         stats->rx_compl++;
2257         stats->rx_bytes += rxcp->pkt_size;
2258         stats->rx_pkts++;
2259         if (rxcp->tunneled)
2260                 stats->rx_vxlan_offload_pkts++;
2261         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262                 stats->rx_mcast_pkts++;
2263         if (rxcp->err)
2264                 stats->rx_compl_err++;
2265         u64_stats_update_end(&stats->sync);
2266 }
2267
2268 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269 {
2270         /* L4 checksum is not reliable for non TCP/UDP packets.
2271          * Also ignore ipcksm for ipv6 pkts
2272          */
2273         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275 }
2276
2277 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278 {
2279         struct be_adapter *adapter = rxo->adapter;
2280         struct be_rx_page_info *rx_page_info;
2281         struct be_queue_info *rxq = &rxo->q;
2282         u32 frag_idx = rxq->tail;
2283
2284         rx_page_info = &rxo->page_info_tbl[frag_idx];
2285         BUG_ON(!rx_page_info->page);
2286
2287         if (rx_page_info->last_frag) {
2288                 dma_unmap_page(&adapter->pdev->dev,
2289                                dma_unmap_addr(rx_page_info, bus),
2290                                adapter->big_page_size, DMA_FROM_DEVICE);
2291                 rx_page_info->last_frag = false;
2292         } else {
2293                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2294                                         dma_unmap_addr(rx_page_info, bus),
2295                                         rx_frag_size, DMA_FROM_DEVICE);
2296         }
2297
2298         queue_tail_inc(rxq);
2299         atomic_dec(&rxq->used);
2300         return rx_page_info;
2301 }
2302
2303 /* Throwaway the data in the Rx completion */
2304 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305                                 struct be_rx_compl_info *rxcp)
2306 {
2307         struct be_rx_page_info *page_info;
2308         u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310         for (i = 0; i < num_rcvd; i++) {
2311                 page_info = get_rx_page_info(rxo);
2312                 put_page(page_info->page);
2313                 memset(page_info, 0, sizeof(*page_info));
2314         }
2315 }
2316
2317 /*
2318  * skb_fill_rx_data forms a complete skb for an ether frame
2319  * indicated by rxcp.
2320  */
2321 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322                              struct be_rx_compl_info *rxcp)
2323 {
2324         struct be_rx_page_info *page_info;
2325         u16 i, j;
2326         u16 hdr_len, curr_frag_len, remaining;
2327         u8 *start;
2328
2329         page_info = get_rx_page_info(rxo);
2330         start = page_address(page_info->page) + page_info->page_offset;
2331         prefetch(start);
2332
2333         /* Copy data in the first descriptor of this completion */
2334         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336         skb->len = curr_frag_len;
2337         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338                 memcpy(skb->data, start, curr_frag_len);
2339                 /* Complete packet has now been moved to data */
2340                 put_page(page_info->page);
2341                 skb->data_len = 0;
2342                 skb->tail += curr_frag_len;
2343         } else {
2344                 hdr_len = ETH_HLEN;
2345                 memcpy(skb->data, start, hdr_len);
2346                 skb_shinfo(skb)->nr_frags = 1;
2347                 skb_frag_set_page(skb, 0, page_info->page);
2348                 skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2349                                  page_info->page_offset + hdr_len);
2350                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2351                                   curr_frag_len - hdr_len);
2352                 skb->data_len = curr_frag_len - hdr_len;
2353                 skb->truesize += rx_frag_size;
2354                 skb->tail += hdr_len;
2355         }
2356         page_info->page = NULL;
2357
2358         if (rxcp->pkt_size <= rx_frag_size) {
2359                 BUG_ON(rxcp->num_rcvd != 1);
2360                 return;
2361         }
2362
2363         /* More frags present for this completion */
2364         remaining = rxcp->pkt_size - curr_frag_len;
2365         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2366                 page_info = get_rx_page_info(rxo);
2367                 curr_frag_len = min(remaining, rx_frag_size);
2368
2369                 /* Coalesce all frags from the same physical page in one slot */
2370                 if (page_info->page_offset == 0) {
2371                         /* Fresh page */
2372                         j++;
2373                         skb_frag_set_page(skb, j, page_info->page);
2374                         skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2375                                          page_info->page_offset);
2376                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2377                         skb_shinfo(skb)->nr_frags++;
2378                 } else {
2379                         put_page(page_info->page);
2380                 }
2381
2382                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                 skb->len += curr_frag_len;
2384                 skb->data_len += curr_frag_len;
2385                 skb->truesize += rx_frag_size;
2386                 remaining -= curr_frag_len;
2387                 page_info->page = NULL;
2388         }
2389         BUG_ON(j > MAX_SKB_FRAGS);
2390 }
2391
2392 /* Process the RX completion indicated by rxcp when GRO is disabled */
2393 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394                                 struct be_rx_compl_info *rxcp)
2395 {
2396         struct be_adapter *adapter = rxo->adapter;
2397         struct net_device *netdev = adapter->netdev;
2398         struct sk_buff *skb;
2399
2400         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401         if (unlikely(!skb)) {
2402                 rx_stats(rxo)->rx_drops_no_skbs++;
2403                 be_rx_compl_discard(rxo, rxcp);
2404                 return;
2405         }
2406
2407         skb_fill_rx_data(rxo, skb, rxcp);
2408
2409         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2411         else
2412                 skb_checksum_none_assert(skb);
2413
2414         skb->protocol = eth_type_trans(skb, netdev);
2415         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416         if (netdev->features & NETIF_F_RXHASH)
2417                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419         skb->csum_level = rxcp->tunneled;
2420         skb_mark_napi_id(skb, napi);
2421
2422         if (rxcp->vlanf)
2423                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425         netif_receive_skb(skb);
2426 }
2427
2428 /* Process the RX completion indicated by rxcp when GRO is enabled */
2429 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430                                     struct napi_struct *napi,
2431                                     struct be_rx_compl_info *rxcp)
2432 {
2433         struct be_adapter *adapter = rxo->adapter;
2434         struct be_rx_page_info *page_info;
2435         struct sk_buff *skb = NULL;
2436         u16 remaining, curr_frag_len;
2437         u16 i, j;
2438
2439         skb = napi_get_frags(napi);
2440         if (!skb) {
2441                 be_rx_compl_discard(rxo, rxcp);
2442                 return;
2443         }
2444
2445         remaining = rxcp->pkt_size;
2446         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447                 page_info = get_rx_page_info(rxo);
2448
2449                 curr_frag_len = min(remaining, rx_frag_size);
2450
2451                 /* Coalesce all frags from the same physical page in one slot */
2452                 if (i == 0 || page_info->page_offset == 0) {
2453                         /* First frag or Fresh page */
2454                         j++;
2455                         skb_frag_set_page(skb, j, page_info->page);
2456                         skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2457                                          page_info->page_offset);
2458                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2459                 } else {
2460                         put_page(page_info->page);
2461                 }
2462                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2463                 skb->truesize += rx_frag_size;
2464                 remaining -= curr_frag_len;
2465                 memset(page_info, 0, sizeof(*page_info));
2466         }
2467         BUG_ON(j > MAX_SKB_FRAGS);
2468
2469         skb_shinfo(skb)->nr_frags = j + 1;
2470         skb->len = rxcp->pkt_size;
2471         skb->data_len = rxcp->pkt_size;
2472         skb->ip_summed = CHECKSUM_UNNECESSARY;
2473         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2474         if (adapter->netdev->features & NETIF_F_RXHASH)
2475                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2476
2477         skb->csum_level = rxcp->tunneled;
2478
2479         if (rxcp->vlanf)
2480                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2481
2482         napi_gro_frags(napi);
2483 }
2484
2485 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2486                                  struct be_rx_compl_info *rxcp)
2487 {
2488         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2489         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2490         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2491         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2492         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2493         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2494         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2495         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2496         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2497         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2498         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2499         if (rxcp->vlanf) {
2500                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2501                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2502         }
2503         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2504         rxcp->tunneled =
2505                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2506 }
2507
2508 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2509                                  struct be_rx_compl_info *rxcp)
2510 {
2511         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2512         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2513         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2514         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2515         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2516         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2517         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2518         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2519         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2520         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2521         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2522         if (rxcp->vlanf) {
2523                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2524                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2525         }
2526         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2527         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2528 }
2529
2530 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2531 {
2532         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2533         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2534         struct be_adapter *adapter = rxo->adapter;
2535
2536         /* For checking the valid bit it is Ok to use either definition as the
2537          * valid bit is at the same position in both v0 and v1 Rx compl */
2538         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2539                 return NULL;
2540
2541         rmb();
2542         be_dws_le_to_cpu(compl, sizeof(*compl));
2543
2544         if (adapter->be3_native)
2545                 be_parse_rx_compl_v1(compl, rxcp);
2546         else
2547                 be_parse_rx_compl_v0(compl, rxcp);
2548
2549         if (rxcp->ip_frag)
2550                 rxcp->l4_csum = 0;
2551
2552         if (rxcp->vlanf) {
2553                 /* In QNQ modes, if qnq bit is not set, then the packet was
2554                  * tagged only with the transparent outer vlan-tag and must
2555                  * not be treated as a vlan packet by host
2556                  */
2557                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2558                         rxcp->vlanf = 0;
2559
2560                 if (!lancer_chip(adapter))
2561                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2562
2563                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2564                     !test_bit(rxcp->vlan_tag, adapter->vids))
2565                         rxcp->vlanf = 0;
2566         }
2567
2568         /* As the compl has been parsed, reset it; we wont touch it again */
2569         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2570
2571         queue_tail_inc(&rxo->cq);
2572         return rxcp;
2573 }
2574
2575 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2576 {
2577         u32 order = get_order(size);
2578
2579         if (order > 0)
2580                 gfp |= __GFP_COMP;
2581         return  alloc_pages(gfp, order);
2582 }
2583
2584 /*
2585  * Allocate a page, split it to fragments of size rx_frag_size and post as
2586  * receive buffers to BE
2587  */
2588 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2589 {
2590         struct be_adapter *adapter = rxo->adapter;
2591         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2592         struct be_queue_info *rxq = &rxo->q;
2593         struct page *pagep = NULL;
2594         struct device *dev = &adapter->pdev->dev;
2595         struct be_eth_rx_d *rxd;
2596         u64 page_dmaaddr = 0, frag_dmaaddr;
2597         u32 posted, page_offset = 0, notify = 0;
2598
2599         page_info = &rxo->page_info_tbl[rxq->head];
2600         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2601                 if (!pagep) {
2602                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2603                         if (unlikely(!pagep)) {
2604                                 rx_stats(rxo)->rx_post_fail++;
2605                                 break;
2606                         }
2607                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2608                                                     adapter->big_page_size,
2609                                                     DMA_FROM_DEVICE);
2610                         if (dma_mapping_error(dev, page_dmaaddr)) {
2611                                 put_page(pagep);
2612                                 pagep = NULL;
2613                                 adapter->drv_stats.dma_map_errors++;
2614                                 break;
2615                         }
2616                         page_offset = 0;
2617                 } else {
2618                         get_page(pagep);
2619                         page_offset += rx_frag_size;
2620                 }
2621                 page_info->page_offset = page_offset;
2622                 page_info->page = pagep;
2623
2624                 rxd = queue_head_node(rxq);
2625                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2626                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2627                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2628
2629                 /* Any space left in the current big page for another frag? */
2630                 if ((page_offset + rx_frag_size + rx_frag_size) >
2631                                         adapter->big_page_size) {
2632                         pagep = NULL;
2633                         page_info->last_frag = true;
2634                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2635                 } else {
2636                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2637                 }
2638
2639                 prev_page_info = page_info;
2640                 queue_head_inc(rxq);
2641                 page_info = &rxo->page_info_tbl[rxq->head];
2642         }
2643
2644         /* Mark the last frag of a page when we break out of the above loop
2645          * with no more slots available in the RXQ
2646          */
2647         if (pagep) {
2648                 prev_page_info->last_frag = true;
2649                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2650         }
2651
2652         if (posted) {
2653                 atomic_add(posted, &rxq->used);
2654                 if (rxo->rx_post_starved)
2655                         rxo->rx_post_starved = false;
2656                 do {
2657                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2658                         be_rxq_notify(adapter, rxq->id, notify);
2659                         posted -= notify;
2660                 } while (posted);
2661         } else if (atomic_read(&rxq->used) == 0) {
2662                 /* Let be_worker replenish when memory is available */
2663                 rxo->rx_post_starved = true;
2664         }
2665 }
2666
2667 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2668 {
2669         switch (status) {
2670         case BE_TX_COMP_HDR_PARSE_ERR:
2671                 tx_stats(txo)->tx_hdr_parse_err++;
2672                 break;
2673         case BE_TX_COMP_NDMA_ERR:
2674                 tx_stats(txo)->tx_dma_err++;
2675                 break;
2676         case BE_TX_COMP_ACL_ERR:
2677                 tx_stats(txo)->tx_spoof_check_err++;
2678                 break;
2679         }
2680 }
2681
2682 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2683 {
2684         switch (status) {
2685         case LANCER_TX_COMP_LSO_ERR:
2686                 tx_stats(txo)->tx_tso_err++;
2687                 break;
2688         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2689         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2690                 tx_stats(txo)->tx_spoof_check_err++;
2691                 break;
2692         case LANCER_TX_COMP_QINQ_ERR:
2693                 tx_stats(txo)->tx_qinq_err++;
2694                 break;
2695         case LANCER_TX_COMP_PARITY_ERR:
2696                 tx_stats(txo)->tx_internal_parity_err++;
2697                 break;
2698         case LANCER_TX_COMP_DMA_ERR:
2699                 tx_stats(txo)->tx_dma_err++;
2700                 break;
2701         case LANCER_TX_COMP_SGE_ERR:
2702                 tx_stats(txo)->tx_sge_err++;
2703                 break;
2704         }
2705 }
2706
2707 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2708                                                 struct be_tx_obj *txo)
2709 {
2710         struct be_queue_info *tx_cq = &txo->cq;
2711         struct be_tx_compl_info *txcp = &txo->txcp;
2712         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2713
2714         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2715                 return NULL;
2716
2717         /* Ensure load ordering of valid bit dword and other dwords below */
2718         rmb();
2719         be_dws_le_to_cpu(compl, sizeof(*compl));
2720
2721         txcp->status = GET_TX_COMPL_BITS(status, compl);
2722         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2723
2724         if (txcp->status) {
2725                 if (lancer_chip(adapter)) {
2726                         lancer_update_tx_err(txo, txcp->status);
2727                         /* Reset the adapter incase of TSO,
2728                          * SGE or Parity error
2729                          */
2730                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2731                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2732                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2733                                 be_set_error(adapter, BE_ERROR_TX);
2734                 } else {
2735                         be_update_tx_err(txo, txcp->status);
2736                 }
2737         }
2738
2739         if (be_check_error(adapter, BE_ERROR_TX))
2740                 return NULL;
2741
2742         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2743         queue_tail_inc(tx_cq);
2744         return txcp;
2745 }
2746
2747 static u16 be_tx_compl_process(struct be_adapter *adapter,
2748                                struct be_tx_obj *txo, u16 last_index)
2749 {
2750         struct sk_buff **sent_skbs = txo->sent_skb_list;
2751         struct be_queue_info *txq = &txo->q;
2752         struct sk_buff *skb = NULL;
2753         bool unmap_skb_hdr = false;
2754         struct be_eth_wrb *wrb;
2755         u16 num_wrbs = 0;
2756         u32 frag_index;
2757
2758         do {
2759                 if (sent_skbs[txq->tail]) {
2760                         /* Free skb from prev req */
2761                         if (skb)
2762                                 dev_consume_skb_any(skb);
2763                         skb = sent_skbs[txq->tail];
2764                         sent_skbs[txq->tail] = NULL;
2765                         queue_tail_inc(txq);  /* skip hdr wrb */
2766                         num_wrbs++;
2767                         unmap_skb_hdr = true;
2768                 }
2769                 wrb = queue_tail_node(txq);
2770                 frag_index = txq->tail;
2771                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2772                               (unmap_skb_hdr && skb_headlen(skb)));
2773                 unmap_skb_hdr = false;
2774                 queue_tail_inc(txq);
2775                 num_wrbs++;
2776         } while (frag_index != last_index);
2777         dev_consume_skb_any(skb);
2778
2779         return num_wrbs;
2780 }
2781
2782 /* Return the number of events in the event queue */
2783 static inline int events_get(struct be_eq_obj *eqo)
2784 {
2785         struct be_eq_entry *eqe;
2786         int num = 0;
2787
2788         do {
2789                 eqe = queue_tail_node(&eqo->q);
2790                 if (eqe->evt == 0)
2791                         break;
2792
2793                 rmb();
2794                 eqe->evt = 0;
2795                 num++;
2796                 queue_tail_inc(&eqo->q);
2797         } while (true);
2798
2799         return num;
2800 }
2801
2802 /* Leaves the EQ is disarmed state */
2803 static void be_eq_clean(struct be_eq_obj *eqo)
2804 {
2805         int num = events_get(eqo);
2806
2807         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2808 }
2809
2810 /* Free posted rx buffers that were not used */
2811 static void be_rxq_clean(struct be_rx_obj *rxo)
2812 {
2813         struct be_queue_info *rxq = &rxo->q;
2814         struct be_rx_page_info *page_info;
2815
2816         while (atomic_read(&rxq->used) > 0) {
2817                 page_info = get_rx_page_info(rxo);
2818                 put_page(page_info->page);
2819                 memset(page_info, 0, sizeof(*page_info));
2820         }
2821         BUG_ON(atomic_read(&rxq->used));
2822         rxq->tail = 0;
2823         rxq->head = 0;
2824 }
2825
2826 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2827 {
2828         struct be_queue_info *rx_cq = &rxo->cq;
2829         struct be_rx_compl_info *rxcp;
2830         struct be_adapter *adapter = rxo->adapter;
2831         int flush_wait = 0;
2832
2833         /* Consume pending rx completions.
2834          * Wait for the flush completion (identified by zero num_rcvd)
2835          * to arrive. Notify CQ even when there are no more CQ entries
2836          * for HW to flush partially coalesced CQ entries.
2837          * In Lancer, there is no need to wait for flush compl.
2838          */
2839         for (;;) {
2840                 rxcp = be_rx_compl_get(rxo);
2841                 if (!rxcp) {
2842                         if (lancer_chip(adapter))
2843                                 break;
2844
2845                         if (flush_wait++ > 50 ||
2846                             be_check_error(adapter,
2847                                            BE_ERROR_HW)) {
2848                                 dev_warn(&adapter->pdev->dev,
2849                                          "did not receive flush compl\n");
2850                                 break;
2851                         }
2852                         be_cq_notify(adapter, rx_cq->id, true, 0);
2853                         mdelay(1);
2854                 } else {
2855                         be_rx_compl_discard(rxo, rxcp);
2856                         be_cq_notify(adapter, rx_cq->id, false, 1);
2857                         if (rxcp->num_rcvd == 0)
2858                                 break;
2859                 }
2860         }
2861
2862         /* After cleanup, leave the CQ in unarmed state */
2863         be_cq_notify(adapter, rx_cq->id, false, 0);
2864 }
2865
2866 static void be_tx_compl_clean(struct be_adapter *adapter)
2867 {
2868         struct device *dev = &adapter->pdev->dev;
2869         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2870         struct be_tx_compl_info *txcp;
2871         struct be_queue_info *txq;
2872         u32 end_idx, notified_idx;
2873         struct be_tx_obj *txo;
2874         int i, pending_txqs;
2875
2876         /* Stop polling for compls when HW has been silent for 10ms */
2877         do {
2878                 pending_txqs = adapter->num_tx_qs;
2879
2880                 for_all_tx_queues(adapter, txo, i) {
2881                         cmpl = 0;
2882                         num_wrbs = 0;
2883                         txq = &txo->q;
2884                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2885                                 num_wrbs +=
2886                                         be_tx_compl_process(adapter, txo,
2887                                                             txcp->end_index);
2888                                 cmpl++;
2889                         }
2890                         if (cmpl) {
2891                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2892                                 atomic_sub(num_wrbs, &txq->used);
2893                                 timeo = 0;
2894                         }
2895                         if (!be_is_tx_compl_pending(txo))
2896                                 pending_txqs--;
2897                 }
2898
2899                 if (pending_txqs == 0 || ++timeo > 10 ||
2900                     be_check_error(adapter, BE_ERROR_HW))
2901                         break;
2902
2903                 mdelay(1);
2904         } while (true);
2905
2906         /* Free enqueued TX that was never notified to HW */
2907         for_all_tx_queues(adapter, txo, i) {
2908                 txq = &txo->q;
2909
2910                 if (atomic_read(&txq->used)) {
2911                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2912                                  i, atomic_read(&txq->used));
2913                         notified_idx = txq->tail;
2914                         end_idx = txq->tail;
2915                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2916                                   txq->len);
2917                         /* Use the tx-compl process logic to handle requests
2918                          * that were not sent to the HW.
2919                          */
2920                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2921                         atomic_sub(num_wrbs, &txq->used);
2922                         BUG_ON(atomic_read(&txq->used));
2923                         txo->pend_wrb_cnt = 0;
2924                         /* Since hw was never notified of these requests,
2925                          * reset TXQ indices
2926                          */
2927                         txq->head = notified_idx;
2928                         txq->tail = notified_idx;
2929                 }
2930         }
2931 }
2932
2933 static void be_evt_queues_destroy(struct be_adapter *adapter)
2934 {
2935         struct be_eq_obj *eqo;
2936         int i;
2937
2938         for_all_evt_queues(adapter, eqo, i) {
2939                 if (eqo->q.created) {
2940                         be_eq_clean(eqo);
2941                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2942                         netif_napi_del(&eqo->napi);
2943                         free_cpumask_var(eqo->affinity_mask);
2944                 }
2945                 be_queue_free(adapter, &eqo->q);
2946         }
2947 }
2948
2949 static int be_evt_queues_create(struct be_adapter *adapter)
2950 {
2951         struct be_queue_info *eq;
2952         struct be_eq_obj *eqo;
2953         struct be_aic_obj *aic;
2954         int i, rc;
2955
2956         /* need enough EQs to service both RX and TX queues */
2957         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2958                                     max(adapter->cfg_num_rx_irqs,
2959                                         adapter->cfg_num_tx_irqs));
2960
2961         adapter->aic_enabled = true;
2962
2963         for_all_evt_queues(adapter, eqo, i) {
2964                 int numa_node = dev_to_node(&adapter->pdev->dev);
2965
2966                 aic = &adapter->aic_obj[i];
2967                 eqo->adapter = adapter;
2968                 eqo->idx = i;
2969                 aic->max_eqd = BE_MAX_EQD;
2970
2971                 eq = &eqo->q;
2972                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2973                                     sizeof(struct be_eq_entry));
2974                 if (rc)
2975                         return rc;
2976
2977                 rc = be_cmd_eq_create(adapter, eqo);
2978                 if (rc)
2979                         return rc;
2980
2981                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2982                         return -ENOMEM;
2983                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2984                                 eqo->affinity_mask);
2985                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll);
2986         }
2987         return 0;
2988 }
2989
2990 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2991 {
2992         struct be_queue_info *q;
2993
2994         q = &adapter->mcc_obj.q;
2995         if (q->created)
2996                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2997         be_queue_free(adapter, q);
2998
2999         q = &adapter->mcc_obj.cq;
3000         if (q->created)
3001                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3002         be_queue_free(adapter, q);
3003 }
3004
3005 /* Must be called only after TX qs are created as MCC shares TX EQ */
3006 static int be_mcc_queues_create(struct be_adapter *adapter)
3007 {
3008         struct be_queue_info *q, *cq;
3009
3010         cq = &adapter->mcc_obj.cq;
3011         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3012                            sizeof(struct be_mcc_compl)))
3013                 goto err;
3014
3015         /* Use the default EQ for MCC completions */
3016         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3017                 goto mcc_cq_free;
3018
3019         q = &adapter->mcc_obj.q;
3020         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3021                 goto mcc_cq_destroy;
3022
3023         if (be_cmd_mccq_create(adapter, q, cq))
3024                 goto mcc_q_free;
3025
3026         return 0;
3027
3028 mcc_q_free:
3029         be_queue_free(adapter, q);
3030 mcc_cq_destroy:
3031         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3032 mcc_cq_free:
3033         be_queue_free(adapter, cq);
3034 err:
3035         return -1;
3036 }
3037
3038 static void be_tx_queues_destroy(struct be_adapter *adapter)
3039 {
3040         struct be_queue_info *q;
3041         struct be_tx_obj *txo;
3042         u8 i;
3043
3044         for_all_tx_queues(adapter, txo, i) {
3045                 q = &txo->q;
3046                 if (q->created)
3047                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3048                 be_queue_free(adapter, q);
3049
3050                 q = &txo->cq;
3051                 if (q->created)
3052                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3053                 be_queue_free(adapter, q);
3054         }
3055 }
3056
3057 static int be_tx_qs_create(struct be_adapter *adapter)
3058 {
3059         struct be_queue_info *cq;
3060         struct be_tx_obj *txo;
3061         struct be_eq_obj *eqo;
3062         int status, i;
3063
3064         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3065
3066         for_all_tx_queues(adapter, txo, i) {
3067                 cq = &txo->cq;
3068                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3069                                         sizeof(struct be_eth_tx_compl));
3070                 if (status)
3071                         return status;
3072
3073                 u64_stats_init(&txo->stats.sync);
3074                 u64_stats_init(&txo->stats.sync_compl);
3075
3076                 /* If num_evt_qs is less than num_tx_qs, then more than
3077                  * one txq share an eq
3078                  */
3079                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3080                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3081                 if (status)
3082                         return status;
3083
3084                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3085                                         sizeof(struct be_eth_wrb));
3086                 if (status)
3087                         return status;
3088
3089                 status = be_cmd_txq_create(adapter, txo);
3090                 if (status)
3091                         return status;
3092
3093                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3094                                     eqo->idx);
3095         }
3096
3097         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3098                  adapter->num_tx_qs);
3099         return 0;
3100 }
3101
3102 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3103 {
3104         struct be_queue_info *q;
3105         struct be_rx_obj *rxo;
3106         int i;
3107
3108         for_all_rx_queues(adapter, rxo, i) {
3109                 q = &rxo->cq;
3110                 if (q->created)
3111                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3112                 be_queue_free(adapter, q);
3113         }
3114 }
3115
3116 static int be_rx_cqs_create(struct be_adapter *adapter)
3117 {
3118         struct be_queue_info *eq, *cq;
3119         struct be_rx_obj *rxo;
3120         int rc, i;
3121
3122         adapter->num_rss_qs =
3123                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3124
3125         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3126         if (adapter->num_rss_qs < 2)
3127                 adapter->num_rss_qs = 0;
3128
3129         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3130
3131         /* When the interface is not capable of RSS rings (and there is no
3132          * need to create a default RXQ) we'll still need one RXQ
3133          */
3134         if (adapter->num_rx_qs == 0)
3135                 adapter->num_rx_qs = 1;
3136
3137         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3138         for_all_rx_queues(adapter, rxo, i) {
3139                 rxo->adapter = adapter;
3140                 cq = &rxo->cq;
3141                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3142                                     sizeof(struct be_eth_rx_compl));
3143                 if (rc)
3144                         return rc;
3145
3146                 u64_stats_init(&rxo->stats.sync);
3147                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3148                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3149                 if (rc)
3150                         return rc;
3151         }
3152
3153         dev_info(&adapter->pdev->dev,
3154                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3155         return 0;
3156 }
3157
3158 static irqreturn_t be_intx(int irq, void *dev)
3159 {
3160         struct be_eq_obj *eqo = dev;
3161         struct be_adapter *adapter = eqo->adapter;
3162         int num_evts = 0;
3163
3164         /* IRQ is not expected when NAPI is scheduled as the EQ
3165          * will not be armed.
3166          * But, this can happen on Lancer INTx where it takes
3167          * a while to de-assert INTx or in BE2 where occasionaly
3168          * an interrupt may be raised even when EQ is unarmed.
3169          * If NAPI is already scheduled, then counting & notifying
3170          * events will orphan them.
3171          */
3172         if (napi_schedule_prep(&eqo->napi)) {
3173                 num_evts = events_get(eqo);
3174                 __napi_schedule(&eqo->napi);
3175                 if (num_evts)
3176                         eqo->spurious_intr = 0;
3177         }
3178         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3179
3180         /* Return IRQ_HANDLED only for the first spurious intr
3181          * after a valid intr to stop the kernel from branding
3182          * this irq as a bad one!
3183          */
3184         if (num_evts || eqo->spurious_intr++ == 0)
3185                 return IRQ_HANDLED;
3186         else
3187                 return IRQ_NONE;
3188 }
3189
3190 static irqreturn_t be_msix(int irq, void *dev)
3191 {
3192         struct be_eq_obj *eqo = dev;
3193
3194         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3195         napi_schedule(&eqo->napi);
3196         return IRQ_HANDLED;
3197 }
3198
3199 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3200 {
3201         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3202 }
3203
3204 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3205                          int budget)
3206 {
3207         struct be_adapter *adapter = rxo->adapter;
3208         struct be_queue_info *rx_cq = &rxo->cq;
3209         struct be_rx_compl_info *rxcp;
3210         u32 work_done;
3211         u32 frags_consumed = 0;
3212
3213         for (work_done = 0; work_done < budget; work_done++) {
3214                 rxcp = be_rx_compl_get(rxo);
3215                 if (!rxcp)
3216                         break;
3217
3218                 /* Is it a flush compl that has no data */
3219                 if (unlikely(rxcp->num_rcvd == 0))
3220                         goto loop_continue;
3221
3222                 /* Discard compl with partial DMA Lancer B0 */
3223                 if (unlikely(!rxcp->pkt_size)) {
3224                         be_rx_compl_discard(rxo, rxcp);
3225                         goto loop_continue;
3226                 }
3227
3228                 /* On BE drop pkts that arrive due to imperfect filtering in
3229                  * promiscuous mode on some skews
3230                  */
3231                 if (unlikely(rxcp->port != adapter->port_num &&
3232                              !lancer_chip(adapter))) {
3233                         be_rx_compl_discard(rxo, rxcp);
3234                         goto loop_continue;
3235                 }
3236
3237                 if (do_gro(rxcp))
3238                         be_rx_compl_process_gro(rxo, napi, rxcp);
3239                 else
3240                         be_rx_compl_process(rxo, napi, rxcp);
3241
3242 loop_continue:
3243                 frags_consumed += rxcp->num_rcvd;
3244                 be_rx_stats_update(rxo, rxcp);
3245         }
3246
3247         if (work_done) {
3248                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3249
3250                 /* When an rx-obj gets into post_starved state, just
3251                  * let be_worker do the posting.
3252                  */
3253                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3254                     !rxo->rx_post_starved)
3255                         be_post_rx_frags(rxo, GFP_ATOMIC,
3256                                          max_t(u32, MAX_RX_POST,
3257                                                frags_consumed));
3258         }
3259
3260         return work_done;
3261 }
3262
3263
3264 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3265                           int idx)
3266 {
3267         int num_wrbs = 0, work_done = 0;
3268         struct be_tx_compl_info *txcp;
3269
3270         while ((txcp = be_tx_compl_get(adapter, txo))) {
3271                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3272                 work_done++;
3273         }
3274
3275         if (work_done) {
3276                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3277                 atomic_sub(num_wrbs, &txo->q.used);
3278
3279                 /* As Tx wrbs have been freed up, wake up netdev queue
3280                  * if it was stopped due to lack of tx wrbs.  */
3281                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3282                     be_can_txq_wake(txo)) {
3283                         netif_wake_subqueue(adapter->netdev, idx);
3284                 }
3285
3286                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3287                 tx_stats(txo)->tx_compl += work_done;
3288                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3289         }
3290 }
3291
3292 int be_poll(struct napi_struct *napi, int budget)
3293 {
3294         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3295         struct be_adapter *adapter = eqo->adapter;
3296         int max_work = 0, work, i, num_evts;
3297         struct be_rx_obj *rxo;
3298         struct be_tx_obj *txo;
3299         u32 mult_enc = 0;
3300
3301         num_evts = events_get(eqo);
3302
3303         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3304                 be_process_tx(adapter, txo, i);
3305
3306         /* This loop will iterate twice for EQ0 in which
3307          * completions of the last RXQ (default one) are also processed
3308          * For other EQs the loop iterates only once
3309          */
3310         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3311                 work = be_process_rx(rxo, napi, budget);
3312                 max_work = max(work, max_work);
3313         }
3314
3315         if (is_mcc_eqo(eqo))
3316                 be_process_mcc(adapter);
3317
3318         if (max_work < budget) {
3319                 napi_complete_done(napi, max_work);
3320
3321                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3322                  * delay via a delay multiplier encoding value
3323                  */
3324                 if (skyhawk_chip(adapter))
3325                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3326
3327                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3328                              mult_enc);
3329         } else {
3330                 /* As we'll continue in polling mode, count and clear events */
3331                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3332         }
3333         return max_work;
3334 }
3335
3336 void be_detect_error(struct be_adapter *adapter)
3337 {
3338         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3339         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3340         struct device *dev = &adapter->pdev->dev;
3341         u16 val;
3342         u32 i;
3343
3344         if (be_check_error(adapter, BE_ERROR_HW))
3345                 return;
3346
3347         if (lancer_chip(adapter)) {
3348                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3349                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3350                         be_set_error(adapter, BE_ERROR_UE);
3351                         sliport_err1 = ioread32(adapter->db +
3352                                                 SLIPORT_ERROR1_OFFSET);
3353                         sliport_err2 = ioread32(adapter->db +
3354                                                 SLIPORT_ERROR2_OFFSET);
3355                         /* Do not log error messages if its a FW reset */
3356                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3357                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3358                                 dev_info(dev, "Reset is in progress\n");
3359                         } else {
3360                                 dev_err(dev, "Error detected in the card\n");
3361                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3362                                         sliport_status);
3363                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3364                                         sliport_err1);
3365                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3366                                         sliport_err2);
3367                         }
3368                 }
3369         } else {
3370                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3371                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3372                 ue_lo_mask = ioread32(adapter->pcicfg +
3373                                       PCICFG_UE_STATUS_LOW_MASK);
3374                 ue_hi_mask = ioread32(adapter->pcicfg +
3375                                       PCICFG_UE_STATUS_HI_MASK);
3376
3377                 ue_lo = (ue_lo & ~ue_lo_mask);
3378                 ue_hi = (ue_hi & ~ue_hi_mask);
3379
3380                 if (ue_lo || ue_hi) {
3381                         /* On certain platforms BE3 hardware can indicate
3382                          * spurious UEs. In case of a UE in the chip,
3383                          * the POST register correctly reports either a
3384                          * FAT_LOG_START state (FW is currently dumping
3385                          * FAT log data) or a ARMFW_UE state. Check for the
3386                          * above states to ascertain if the UE is valid or not.
3387                          */
3388                         if (BE3_chip(adapter)) {
3389                                 val = be_POST_stage_get(adapter);
3390                                 if ((val & POST_STAGE_FAT_LOG_START)
3391                                      != POST_STAGE_FAT_LOG_START &&
3392                                     (val & POST_STAGE_ARMFW_UE)
3393                                      != POST_STAGE_ARMFW_UE &&
3394                                     (val & POST_STAGE_RECOVERABLE_ERR)
3395                                      != POST_STAGE_RECOVERABLE_ERR)
3396                                         return;
3397                         }
3398
3399                         dev_err(dev, "Error detected in the adapter");
3400                         be_set_error(adapter, BE_ERROR_UE);
3401
3402                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3403                                 if (ue_lo & 1)
3404                                         dev_err(dev, "UE: %s bit set\n",
3405                                                 ue_status_low_desc[i]);
3406                         }
3407                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3408                                 if (ue_hi & 1)
3409                                         dev_err(dev, "UE: %s bit set\n",
3410                                                 ue_status_hi_desc[i]);
3411                         }
3412                 }
3413         }
3414 }
3415
3416 static void be_msix_disable(struct be_adapter *adapter)
3417 {
3418         if (msix_enabled(adapter)) {
3419                 pci_disable_msix(adapter->pdev);
3420                 adapter->num_msix_vec = 0;
3421                 adapter->num_msix_roce_vec = 0;
3422         }
3423 }
3424
3425 static int be_msix_enable(struct be_adapter *adapter)
3426 {
3427         unsigned int i, max_roce_eqs;
3428         struct device *dev = &adapter->pdev->dev;
3429         int num_vec;
3430
3431         /* If RoCE is supported, program the max number of vectors that
3432          * could be used for NIC and RoCE, else, just program the number
3433          * we'll use initially.
3434          */
3435         if (be_roce_supported(adapter)) {
3436                 max_roce_eqs =
3437                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3438                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3439                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3440         } else {
3441                 num_vec = max(adapter->cfg_num_rx_irqs,
3442                               adapter->cfg_num_tx_irqs);
3443         }
3444
3445         for (i = 0; i < num_vec; i++)
3446                 adapter->msix_entries[i].entry = i;
3447
3448         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3449                                         MIN_MSIX_VECTORS, num_vec);
3450         if (num_vec < 0)
3451                 goto fail;
3452
3453         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3454                 adapter->num_msix_roce_vec = num_vec / 2;
3455                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3456                          adapter->num_msix_roce_vec);
3457         }
3458
3459         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3460
3461         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3462                  adapter->num_msix_vec);
3463         return 0;
3464
3465 fail:
3466         dev_warn(dev, "MSIx enable failed\n");
3467
3468         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3469         if (be_virtfn(adapter))
3470                 return num_vec;
3471         return 0;
3472 }
3473
3474 static inline int be_msix_vec_get(struct be_adapter *adapter,
3475                                   struct be_eq_obj *eqo)
3476 {
3477         return adapter->msix_entries[eqo->msix_idx].vector;
3478 }
3479
3480 static int be_msix_register(struct be_adapter *adapter)
3481 {
3482         struct net_device *netdev = adapter->netdev;
3483         struct be_eq_obj *eqo;
3484         int status, i, vec;
3485
3486         for_all_evt_queues(adapter, eqo, i) {
3487                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3488                 vec = be_msix_vec_get(adapter, eqo);
3489                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3490                 if (status)
3491                         goto err_msix;
3492
3493                 irq_update_affinity_hint(vec, eqo->affinity_mask);
3494         }
3495
3496         return 0;
3497 err_msix:
3498         for (i--; i >= 0; i--) {
3499                 eqo = &adapter->eq_obj[i];
3500                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3501         }
3502         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3503                  status);
3504         be_msix_disable(adapter);
3505         return status;
3506 }
3507
3508 static int be_irq_register(struct be_adapter *adapter)
3509 {
3510         struct net_device *netdev = adapter->netdev;
3511         int status;
3512
3513         if (msix_enabled(adapter)) {
3514                 status = be_msix_register(adapter);
3515                 if (status == 0)
3516                         goto done;
3517                 /* INTx is not supported for VF */
3518                 if (be_virtfn(adapter))
3519                         return status;
3520         }
3521
3522         /* INTx: only the first EQ is used */
3523         netdev->irq = adapter->pdev->irq;
3524         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3525                              &adapter->eq_obj[0]);
3526         if (status) {
3527                 dev_err(&adapter->pdev->dev,
3528                         "INTx request IRQ failed - err %d\n", status);
3529                 return status;
3530         }
3531 done:
3532         adapter->isr_registered = true;
3533         return 0;
3534 }
3535
3536 static void be_irq_unregister(struct be_adapter *adapter)
3537 {
3538         struct net_device *netdev = adapter->netdev;
3539         struct be_eq_obj *eqo;
3540         int i, vec;
3541
3542         if (!adapter->isr_registered)
3543                 return;
3544
3545         /* INTx */
3546         if (!msix_enabled(adapter)) {
3547                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3548                 goto done;
3549         }
3550
3551         /* MSIx */
3552         for_all_evt_queues(adapter, eqo, i) {
3553                 vec = be_msix_vec_get(adapter, eqo);
3554                 irq_update_affinity_hint(vec, NULL);
3555                 free_irq(vec, eqo);
3556         }
3557
3558 done:
3559         adapter->isr_registered = false;
3560 }
3561
3562 static void be_rx_qs_destroy(struct be_adapter *adapter)
3563 {
3564         struct rss_info *rss = &adapter->rss_info;
3565         struct be_queue_info *q;
3566         struct be_rx_obj *rxo;
3567         int i;
3568
3569         for_all_rx_queues(adapter, rxo, i) {
3570                 q = &rxo->q;
3571                 if (q->created) {
3572                         /* If RXQs are destroyed while in an "out of buffer"
3573                          * state, there is a possibility of an HW stall on
3574                          * Lancer. So, post 64 buffers to each queue to relieve
3575                          * the "out of buffer" condition.
3576                          * Make sure there's space in the RXQ before posting.
3577                          */
3578                         if (lancer_chip(adapter)) {
3579                                 be_rx_cq_clean(rxo);
3580                                 if (atomic_read(&q->used) == 0)
3581                                         be_post_rx_frags(rxo, GFP_KERNEL,
3582                                                          MAX_RX_POST);
3583                         }
3584
3585                         be_cmd_rxq_destroy(adapter, q);
3586                         be_rx_cq_clean(rxo);
3587                         be_rxq_clean(rxo);
3588                 }
3589                 be_queue_free(adapter, q);
3590         }
3591
3592         if (rss->rss_flags) {
3593                 rss->rss_flags = RSS_ENABLE_NONE;
3594                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3595                                   128, rss->rss_hkey);
3596         }
3597 }
3598
3599 static void be_disable_if_filters(struct be_adapter *adapter)
3600 {
3601         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3602         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3603             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3604                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3605                 eth_zero_addr(adapter->dev_mac);
3606         }
3607
3608         be_clear_uc_list(adapter);
3609         be_clear_mc_list(adapter);
3610
3611         /* The IFACE flags are enabled in the open path and cleared
3612          * in the close path. When a VF gets detached from the host and
3613          * assigned to a VM the following happens:
3614          *      - VF's IFACE flags get cleared in the detach path
3615          *      - IFACE create is issued by the VF in the attach path
3616          * Due to a bug in the BE3/Skyhawk-R FW
3617          * (Lancer FW doesn't have the bug), the IFACE capability flags
3618          * specified along with the IFACE create cmd issued by a VF are not
3619          * honoured by FW.  As a consequence, if a *new* driver
3620          * (that enables/disables IFACE flags in open/close)
3621          * is loaded in the host and an *old* driver is * used by a VM/VF,
3622          * the IFACE gets created *without* the needed flags.
3623          * To avoid this, disable RX-filter flags only for Lancer.
3624          */
3625         if (lancer_chip(adapter)) {
3626                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3627                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3628         }
3629 }
3630
3631 static int be_close(struct net_device *netdev)
3632 {
3633         struct be_adapter *adapter = netdev_priv(netdev);
3634         struct be_eq_obj *eqo;
3635         int i;
3636
3637         /* This protection is needed as be_close() may be called even when the
3638          * adapter is in cleared state (after eeh perm failure)
3639          */
3640         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3641                 return 0;
3642
3643         /* Before attempting cleanup ensure all the pending cmds in the
3644          * config_wq have finished execution
3645          */
3646         flush_workqueue(be_wq);
3647
3648         be_disable_if_filters(adapter);
3649
3650         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3651                 for_all_evt_queues(adapter, eqo, i) {
3652                         napi_disable(&eqo->napi);
3653                 }
3654                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3655         }
3656
3657         be_async_mcc_disable(adapter);
3658
3659         /* Wait for all pending tx completions to arrive so that
3660          * all tx skbs are freed.
3661          */
3662         netif_tx_disable(netdev);
3663         be_tx_compl_clean(adapter);
3664
3665         be_rx_qs_destroy(adapter);
3666
3667         for_all_evt_queues(adapter, eqo, i) {
3668                 if (msix_enabled(adapter))
3669                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3670                 else
3671                         synchronize_irq(netdev->irq);
3672                 be_eq_clean(eqo);
3673         }
3674
3675         be_irq_unregister(adapter);
3676
3677         return 0;
3678 }
3679
3680 static int be_rx_qs_create(struct be_adapter *adapter)
3681 {
3682         struct rss_info *rss = &adapter->rss_info;
3683         u8 rss_key[RSS_HASH_KEY_LEN];
3684         struct be_rx_obj *rxo;
3685         int rc, i, j;
3686
3687         for_all_rx_queues(adapter, rxo, i) {
3688                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3689                                     sizeof(struct be_eth_rx_d));
3690                 if (rc)
3691                         return rc;
3692         }
3693
3694         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3695                 rxo = default_rxo(adapter);
3696                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3697                                        rx_frag_size, adapter->if_handle,
3698                                        false, &rxo->rss_id);
3699                 if (rc)
3700                         return rc;
3701         }
3702
3703         for_all_rss_queues(adapter, rxo, i) {
3704                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3705                                        rx_frag_size, adapter->if_handle,
3706                                        true, &rxo->rss_id);
3707                 if (rc)
3708                         return rc;
3709         }
3710
3711         if (be_multi_rxq(adapter)) {
3712                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3713                         for_all_rss_queues(adapter, rxo, i) {
3714                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3715                                         break;
3716                                 rss->rsstable[j + i] = rxo->rss_id;
3717                                 rss->rss_queue[j + i] = i;
3718                         }
3719                 }
3720                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3721                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3722
3723                 if (!BEx_chip(adapter))
3724                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3725                                 RSS_ENABLE_UDP_IPV6;
3726
3727                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3728                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3729                                        RSS_INDIR_TABLE_LEN, rss_key);
3730                 if (rc) {
3731                         rss->rss_flags = RSS_ENABLE_NONE;
3732                         return rc;
3733                 }
3734
3735                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3736         } else {
3737                 /* Disable RSS, if only default RX Q is created */
3738                 rss->rss_flags = RSS_ENABLE_NONE;
3739         }
3740
3741
3742         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3743          * which is a queue empty condition
3744          */
3745         for_all_rx_queues(adapter, rxo, i)
3746                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3747
3748         return 0;
3749 }
3750
3751 static int be_enable_if_filters(struct be_adapter *adapter)
3752 {
3753         int status;
3754
3755         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3756         if (status)
3757                 return status;
3758
3759         /* Normally this condition usually true as the ->dev_mac is zeroed.
3760          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3761          * subsequent be_dev_mac_add() can fail (after fresh boot)
3762          */
3763         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3764                 int old_pmac_id = -1;
3765
3766                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3767                 if (!is_zero_ether_addr(adapter->dev_mac))
3768                         old_pmac_id = adapter->pmac_id[0];
3769
3770                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3771                 if (status)
3772                         return status;
3773
3774                 /* Delete the old programmed MAC as we successfully programmed
3775                  * a new MAC
3776                  */
3777                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3778                         be_dev_mac_del(adapter, old_pmac_id);
3779
3780                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3781         }
3782
3783         if (adapter->vlans_added)
3784                 be_vid_config(adapter);
3785
3786         __be_set_rx_mode(adapter);
3787
3788         return 0;
3789 }
3790
3791 static int be_open(struct net_device *netdev)
3792 {
3793         struct be_adapter *adapter = netdev_priv(netdev);
3794         struct be_eq_obj *eqo;
3795         struct be_rx_obj *rxo;
3796         struct be_tx_obj *txo;
3797         u8 link_status;
3798         int status, i;
3799
3800         status = be_rx_qs_create(adapter);
3801         if (status)
3802                 goto err;
3803
3804         status = be_enable_if_filters(adapter);
3805         if (status)
3806                 goto err;
3807
3808         status = be_irq_register(adapter);
3809         if (status)
3810                 goto err;
3811
3812         for_all_rx_queues(adapter, rxo, i)
3813                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3814
3815         for_all_tx_queues(adapter, txo, i)
3816                 be_cq_notify(adapter, txo->cq.id, true, 0);
3817
3818         be_async_mcc_enable(adapter);
3819
3820         for_all_evt_queues(adapter, eqo, i) {
3821                 napi_enable(&eqo->napi);
3822                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3823         }
3824         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3825
3826         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3827         if (!status)
3828                 be_link_status_update(adapter, link_status);
3829
3830         netif_tx_start_all_queues(netdev);
3831
3832         udp_tunnel_nic_reset_ntf(netdev);
3833
3834         return 0;
3835 err:
3836         be_close(adapter->netdev);
3837         return -EIO;
3838 }
3839
3840 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3841 {
3842         u32 addr;
3843
3844         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3845
3846         mac[5] = (u8)(addr & 0xFF);
3847         mac[4] = (u8)((addr >> 8) & 0xFF);
3848         mac[3] = (u8)((addr >> 16) & 0xFF);
3849         /* Use the OUI from the current MAC address */
3850         memcpy(mac, adapter->netdev->dev_addr, 3);
3851 }
3852
3853 /*
3854  * Generate a seed MAC address from the PF MAC Address using jhash.
3855  * MAC Address for VFs are assigned incrementally starting from the seed.
3856  * These addresses are programmed in the ASIC by the PF and the VF driver
3857  * queries for the MAC address during its probe.
3858  */
3859 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3860 {
3861         u32 vf;
3862         int status = 0;
3863         u8 mac[ETH_ALEN];
3864         struct be_vf_cfg *vf_cfg;
3865
3866         be_vf_eth_addr_generate(adapter, mac);
3867
3868         for_all_vfs(adapter, vf_cfg, vf) {
3869                 if (BEx_chip(adapter))
3870                         status = be_cmd_pmac_add(adapter, mac,
3871                                                  vf_cfg->if_handle,
3872                                                  &vf_cfg->pmac_id, vf + 1);
3873                 else
3874                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3875                                                 vf + 1);
3876
3877                 if (status)
3878                         dev_err(&adapter->pdev->dev,
3879                                 "Mac address assignment failed for VF %d\n",
3880                                 vf);
3881                 else
3882                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3883
3884                 mac[5] += 1;
3885         }
3886         return status;
3887 }
3888
3889 static int be_vfs_mac_query(struct be_adapter *adapter)
3890 {
3891         int status, vf;
3892         u8 mac[ETH_ALEN];
3893         struct be_vf_cfg *vf_cfg;
3894
3895         for_all_vfs(adapter, vf_cfg, vf) {
3896                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3897                                                mac, vf_cfg->if_handle,
3898                                                false, vf+1);
3899                 if (status)
3900                         return status;
3901                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3902         }
3903         return 0;
3904 }
3905
3906 static void be_vf_clear(struct be_adapter *adapter)
3907 {
3908         struct be_vf_cfg *vf_cfg;
3909         u32 vf;
3910
3911         if (pci_vfs_assigned(adapter->pdev)) {
3912                 dev_warn(&adapter->pdev->dev,
3913                          "VFs are assigned to VMs: not disabling VFs\n");
3914                 goto done;
3915         }
3916
3917         pci_disable_sriov(adapter->pdev);
3918
3919         for_all_vfs(adapter, vf_cfg, vf) {
3920                 if (BEx_chip(adapter))
3921                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3922                                         vf_cfg->pmac_id, vf + 1);
3923                 else
3924                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3925                                        vf + 1);
3926
3927                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3928         }
3929
3930         if (BE3_chip(adapter))
3931                 be_cmd_set_hsw_config(adapter, 0, 0,
3932                                       adapter->if_handle,
3933                                       PORT_FWD_TYPE_PASSTHRU, 0);
3934 done:
3935         kfree(adapter->vf_cfg);
3936         adapter->num_vfs = 0;
3937         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3938 }
3939
3940 static void be_clear_queues(struct be_adapter *adapter)
3941 {
3942         be_mcc_queues_destroy(adapter);
3943         be_rx_cqs_destroy(adapter);
3944         be_tx_queues_destroy(adapter);
3945         be_evt_queues_destroy(adapter);
3946 }
3947
3948 static void be_cancel_worker(struct be_adapter *adapter)
3949 {
3950         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3951                 cancel_delayed_work_sync(&adapter->work);
3952                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3953         }
3954 }
3955
3956 static void be_cancel_err_detection(struct be_adapter *adapter)
3957 {
3958         struct be_error_recovery *err_rec = &adapter->error_recovery;
3959
3960         if (!be_err_recovery_workq)
3961                 return;
3962
3963         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3964                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3965                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3966         }
3967 }
3968
3969 /* VxLAN offload Notes:
3970  *
3971  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3972  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3973  * is expected to work across all types of IP tunnels once exported. Skyhawk
3974  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3975  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3976  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3977  * those other tunnels are unexported on the fly through ndo_features_check().
3978  */
3979 static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3980                              unsigned int entry, struct udp_tunnel_info *ti)
3981 {
3982         struct be_adapter *adapter = netdev_priv(netdev);
3983         struct device *dev = &adapter->pdev->dev;
3984         int status;
3985
3986         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3987                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3988         if (status) {
3989                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3990                 return status;
3991         }
3992         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3993
3994         status = be_cmd_set_vxlan_port(adapter, ti->port);
3995         if (status) {
3996                 dev_warn(dev, "Failed to add VxLAN port\n");
3997                 return status;
3998         }
3999         adapter->vxlan_port = ti->port;
4000
4001         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4002                                    NETIF_F_TSO | NETIF_F_TSO6 |
4003                                    NETIF_F_GSO_UDP_TUNNEL;
4004
4005         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4006                  be16_to_cpu(ti->port));
4007         return 0;
4008 }
4009
4010 static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4011                                unsigned int entry, struct udp_tunnel_info *ti)
4012 {
4013         struct be_adapter *adapter = netdev_priv(netdev);
4014
4015         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4016                 be_cmd_manage_iface(adapter, adapter->if_handle,
4017                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4018
4019         if (adapter->vxlan_port)
4020                 be_cmd_set_vxlan_port(adapter, 0);
4021
4022         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4023         adapter->vxlan_port = 0;
4024
4025         netdev->hw_enc_features = 0;
4026         return 0;
4027 }
4028
4029 static const struct udp_tunnel_nic_info be_udp_tunnels = {
4030         .set_port       = be_vxlan_set_port,
4031         .unset_port     = be_vxlan_unset_port,
4032         .flags          = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4033                           UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4034         .tables         = {
4035                 { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4036         },
4037 };
4038
4039 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4040                                 struct be_resources *vft_res)
4041 {
4042         struct be_resources res = adapter->pool_res;
4043         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4044         struct be_resources res_mod = {0};
4045         u16 num_vf_qs = 1;
4046
4047         /* Distribute the queue resources among the PF and it's VFs */
4048         if (num_vfs) {
4049                 /* Divide the rx queues evenly among the VFs and the PF, capped
4050                  * at VF-EQ-count. Any remainder queues belong to the PF.
4051                  */
4052                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4053                                 res.max_rss_qs / (num_vfs + 1));
4054
4055                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4056                  * RSS Tables per port. Provide RSS on VFs, only if number of
4057                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4058                  */
4059                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4060                         num_vf_qs = 1;
4061         }
4062
4063         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4064          * which are modifiable using SET_PROFILE_CONFIG cmd.
4065          */
4066         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4067                                   RESOURCE_MODIFIABLE, 0);
4068
4069         /* If RSS IFACE capability flags are modifiable for a VF, set the
4070          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4071          * more than 1 RSSQ is available for a VF.
4072          * Otherwise, provision only 1 queue pair for VF.
4073          */
4074         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4075                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4076                 if (num_vf_qs > 1) {
4077                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4078                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4079                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4080                 } else {
4081                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4082                                              BE_IF_FLAGS_DEFQ_RSS);
4083                 }
4084         } else {
4085                 num_vf_qs = 1;
4086         }
4087
4088         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4089                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4090                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4091         }
4092
4093         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4094         vft_res->max_rx_qs = num_vf_qs;
4095         vft_res->max_rss_qs = num_vf_qs;
4096         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4097         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4098
4099         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4100          * among the PF and it's VFs, if the fields are changeable
4101          */
4102         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4103                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4104
4105         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4106                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4107
4108         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4109                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4110
4111         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4112                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4113 }
4114
4115 static void be_if_destroy(struct be_adapter *adapter)
4116 {
4117         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4118
4119         kfree(adapter->pmac_id);
4120         adapter->pmac_id = NULL;
4121
4122         kfree(adapter->mc_list);
4123         adapter->mc_list = NULL;
4124
4125         kfree(adapter->uc_list);
4126         adapter->uc_list = NULL;
4127 }
4128
4129 static int be_clear(struct be_adapter *adapter)
4130 {
4131         struct pci_dev *pdev = adapter->pdev;
4132         struct  be_resources vft_res = {0};
4133
4134         be_cancel_worker(adapter);
4135
4136         flush_workqueue(be_wq);
4137
4138         if (sriov_enabled(adapter))
4139                 be_vf_clear(adapter);
4140
4141         /* Re-configure FW to distribute resources evenly across max-supported
4142          * number of VFs, only when VFs are not already enabled.
4143          */
4144         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4145             !pci_vfs_assigned(pdev)) {
4146                 be_calculate_vf_res(adapter,
4147                                     pci_sriov_get_totalvfs(pdev),
4148                                     &vft_res);
4149                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4150                                         pci_sriov_get_totalvfs(pdev),
4151                                         &vft_res);
4152         }
4153
4154         be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4155
4156         be_if_destroy(adapter);
4157
4158         be_clear_queues(adapter);
4159
4160         be_msix_disable(adapter);
4161         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4162         return 0;
4163 }
4164
4165 static int be_vfs_if_create(struct be_adapter *adapter)
4166 {
4167         struct be_resources res = {0};
4168         u32 cap_flags, en_flags, vf;
4169         struct be_vf_cfg *vf_cfg;
4170         int status;
4171
4172         /* If a FW profile exists, then cap_flags are updated */
4173         cap_flags = BE_VF_IF_EN_FLAGS;
4174
4175         for_all_vfs(adapter, vf_cfg, vf) {
4176                 if (!BE3_chip(adapter)) {
4177                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4178                                                            ACTIVE_PROFILE_TYPE,
4179                                                            RESOURCE_LIMITS,
4180                                                            vf + 1);
4181                         if (!status) {
4182                                 cap_flags = res.if_cap_flags;
4183                                 /* Prevent VFs from enabling VLAN promiscuous
4184                                  * mode
4185                                  */
4186                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4187                         }
4188                 }
4189
4190                 /* PF should enable IF flags during proxy if_create call */
4191                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4192                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4193                                           &vf_cfg->if_handle, vf + 1);
4194                 if (status)
4195                         return status;
4196         }
4197
4198         return 0;
4199 }
4200
4201 static int be_vf_setup_init(struct be_adapter *adapter)
4202 {
4203         struct be_vf_cfg *vf_cfg;
4204         int vf;
4205
4206         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4207                                   GFP_KERNEL);
4208         if (!adapter->vf_cfg)
4209                 return -ENOMEM;
4210
4211         for_all_vfs(adapter, vf_cfg, vf) {
4212                 vf_cfg->if_handle = -1;
4213                 vf_cfg->pmac_id = -1;
4214         }
4215         return 0;
4216 }
4217
4218 static int be_vf_setup(struct be_adapter *adapter)
4219 {
4220         struct device *dev = &adapter->pdev->dev;
4221         struct be_vf_cfg *vf_cfg;
4222         int status, old_vfs, vf;
4223         bool spoofchk;
4224
4225         old_vfs = pci_num_vf(adapter->pdev);
4226
4227         status = be_vf_setup_init(adapter);
4228         if (status)
4229                 goto err;
4230
4231         if (old_vfs) {
4232                 for_all_vfs(adapter, vf_cfg, vf) {
4233                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4234                         if (status)
4235                                 goto err;
4236                 }
4237
4238                 status = be_vfs_mac_query(adapter);
4239                 if (status)
4240                         goto err;
4241         } else {
4242                 status = be_vfs_if_create(adapter);
4243                 if (status)
4244                         goto err;
4245
4246                 status = be_vf_eth_addr_config(adapter);
4247                 if (status)
4248                         goto err;
4249         }
4250
4251         for_all_vfs(adapter, vf_cfg, vf) {
4252                 /* Allow VFs to programs MAC/VLAN filters */
4253                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4254                                                   vf + 1);
4255                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4256                         status = be_cmd_set_fn_privileges(adapter,
4257                                                           vf_cfg->privileges |
4258                                                           BE_PRIV_FILTMGMT,
4259                                                           vf + 1);
4260                         if (!status) {
4261                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4262                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4263                                          vf);
4264                         }
4265                 }
4266
4267                 /* Allow full available bandwidth */
4268                 if (!old_vfs)
4269                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4270
4271                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4272                                                vf_cfg->if_handle, NULL,
4273                                                &spoofchk);
4274                 if (!status)
4275                         vf_cfg->spoofchk = spoofchk;
4276
4277                 if (!old_vfs) {
4278                         be_cmd_enable_vf(adapter, vf + 1);
4279                         be_cmd_set_logical_link_config(adapter,
4280                                                        IFLA_VF_LINK_STATE_AUTO,
4281                                                        vf+1);
4282                 }
4283         }
4284
4285         if (!old_vfs) {
4286                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4287                 if (status) {
4288                         dev_err(dev, "SRIOV enable failed\n");
4289                         adapter->num_vfs = 0;
4290                         goto err;
4291                 }
4292         }
4293
4294         if (BE3_chip(adapter)) {
4295                 /* On BE3, enable VEB only when SRIOV is enabled */
4296                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4297                                                adapter->if_handle,
4298                                                PORT_FWD_TYPE_VEB, 0);
4299                 if (status)
4300                         goto err;
4301         }
4302
4303         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4304         return 0;
4305 err:
4306         dev_err(dev, "VF setup failed\n");
4307         be_vf_clear(adapter);
4308         return status;
4309 }
4310
4311 /* Converting function_mode bits on BE3 to SH mc_type enums */
4312
4313 static u8 be_convert_mc_type(u32 function_mode)
4314 {
4315         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4316                 return vNIC1;
4317         else if (function_mode & QNQ_MODE)
4318                 return FLEX10;
4319         else if (function_mode & VNIC_MODE)
4320                 return vNIC2;
4321         else if (function_mode & UMC_ENABLED)
4322                 return UMC;
4323         else
4324                 return MC_NONE;
4325 }
4326
4327 /* On BE2/BE3 FW does not suggest the supported limits */
4328 static void BEx_get_resources(struct be_adapter *adapter,
4329                               struct be_resources *res)
4330 {
4331         bool use_sriov = adapter->num_vfs ? 1 : 0;
4332
4333         if (be_physfn(adapter))
4334                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4335         else
4336                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4337
4338         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4339
4340         if (be_is_mc(adapter)) {
4341                 /* Assuming that there are 4 channels per port,
4342                  * when multi-channel is enabled
4343                  */
4344                 if (be_is_qnq_mode(adapter))
4345                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4346                 else
4347                         /* In a non-qnq multichannel mode, the pvid
4348                          * takes up one vlan entry
4349                          */
4350                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4351         } else {
4352                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4353         }
4354
4355         res->max_mcast_mac = BE_MAX_MC;
4356
4357         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4358          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4359          *    *only* if it is RSS-capable.
4360          */
4361         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4362             be_virtfn(adapter) ||
4363             (be_is_mc(adapter) &&
4364              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4365                 res->max_tx_qs = 1;
4366         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4367                 struct be_resources super_nic_res = {0};
4368
4369                 /* On a SuperNIC profile, the driver needs to use the
4370                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4371                  */
4372                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4373                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4374                                           0);
4375                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4376                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4377         } else {
4378                 res->max_tx_qs = BE3_MAX_TX_QS;
4379         }
4380
4381         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4382             !use_sriov && be_physfn(adapter))
4383                 res->max_rss_qs = (adapter->be3_native) ?
4384                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4385         res->max_rx_qs = res->max_rss_qs + 1;
4386
4387         if (be_physfn(adapter))
4388                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4389                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4390         else
4391                 res->max_evt_qs = 1;
4392
4393         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4394         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4395         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4396                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4397 }
4398
4399 static void be_setup_init(struct be_adapter *adapter)
4400 {
4401         adapter->vlan_prio_bmap = 0xff;
4402         adapter->phy.link_speed = -1;
4403         adapter->if_handle = -1;
4404         adapter->be3_native = false;
4405         adapter->if_flags = 0;
4406         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4407         if (be_physfn(adapter))
4408                 adapter->cmd_privileges = MAX_PRIVILEGES;
4409         else
4410                 adapter->cmd_privileges = MIN_PRIVILEGES;
4411 }
4412
4413 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4414  * However, this HW limitation is not exposed to the host via any SLI cmd.
4415  * As a result, in the case of SRIOV and in particular multi-partition configs
4416  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4417  * for distribution between the VFs. This self-imposed limit will determine the
4418  * no: of VFs for which RSS can be enabled.
4419  */
4420 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4421 {
4422         struct be_port_resources port_res = {0};
4423         u8 rss_tables_on_port;
4424         u16 max_vfs = be_max_vfs(adapter);
4425
4426         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4427                                   RESOURCE_LIMITS, 0);
4428
4429         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4430
4431         /* Each PF Pool's RSS Tables limit =
4432          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4433          */
4434         adapter->pool_res.max_rss_tables =
4435                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4436 }
4437
4438 static int be_get_sriov_config(struct be_adapter *adapter)
4439 {
4440         struct be_resources res = {0};
4441         int max_vfs, old_vfs;
4442
4443         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4444                                   RESOURCE_LIMITS, 0);
4445
4446         /* Some old versions of BE3 FW don't report max_vfs value */
4447         if (BE3_chip(adapter) && !res.max_vfs) {
4448                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4449                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4450         }
4451
4452         adapter->pool_res = res;
4453
4454         /* If during previous unload of the driver, the VFs were not disabled,
4455          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4456          * Instead use the TotalVFs value stored in the pci-dev struct.
4457          */
4458         old_vfs = pci_num_vf(adapter->pdev);
4459         if (old_vfs) {
4460                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4461                          old_vfs);
4462
4463                 adapter->pool_res.max_vfs =
4464                         pci_sriov_get_totalvfs(adapter->pdev);
4465                 adapter->num_vfs = old_vfs;
4466         }
4467
4468         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4469                 be_calculate_pf_pool_rss_tables(adapter);
4470                 dev_info(&adapter->pdev->dev,
4471                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4472                          be_max_pf_pool_rss_tables(adapter));
4473         }
4474         return 0;
4475 }
4476
4477 static void be_alloc_sriov_res(struct be_adapter *adapter)
4478 {
4479         int old_vfs = pci_num_vf(adapter->pdev);
4480         struct  be_resources vft_res = {0};
4481         int status;
4482
4483         be_get_sriov_config(adapter);
4484
4485         if (!old_vfs)
4486                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4487
4488         /* When the HW is in SRIOV capable configuration, the PF-pool
4489          * resources are given to PF during driver load, if there are no
4490          * old VFs. This facility is not available in BE3 FW.
4491          * Also, this is done by FW in Lancer chip.
4492          */
4493         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4494                 be_calculate_vf_res(adapter, 0, &vft_res);
4495                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4496                                                  &vft_res);
4497                 if (status)
4498                         dev_err(&adapter->pdev->dev,
4499                                 "Failed to optimize SRIOV resources\n");
4500         }
4501 }
4502
4503 static int be_get_resources(struct be_adapter *adapter)
4504 {
4505         struct device *dev = &adapter->pdev->dev;
4506         struct be_resources res = {0};
4507         int status;
4508
4509         /* For Lancer, SH etc read per-function resource limits from FW.
4510          * GET_FUNC_CONFIG returns per function guaranteed limits.
4511          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4512          */
4513         if (BEx_chip(adapter)) {
4514                 BEx_get_resources(adapter, &res);
4515         } else {
4516                 status = be_cmd_get_func_config(adapter, &res);
4517                 if (status)
4518                         return status;
4519
4520                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4521                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4522                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4523                         res.max_rss_qs -= 1;
4524         }
4525
4526         /* If RoCE is supported stash away half the EQs for RoCE */
4527         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4528                                 res.max_evt_qs / 2 : res.max_evt_qs;
4529         adapter->res = res;
4530
4531         /* If FW supports RSS default queue, then skip creating non-RSS
4532          * queue for non-IP traffic.
4533          */
4534         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4535                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4536
4537         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4538                  be_max_txqs(adapter), be_max_rxqs(adapter),
4539                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4540                  be_max_vfs(adapter));
4541         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4542                  be_max_uc(adapter), be_max_mc(adapter),
4543                  be_max_vlans(adapter));
4544
4545         /* Ensure RX and TX queues are created in pairs at init time */
4546         adapter->cfg_num_rx_irqs =
4547                                 min_t(u16, netif_get_num_default_rss_queues(),
4548                                       be_max_qp_irqs(adapter));
4549         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4550         return 0;
4551 }
4552
4553 static int be_get_config(struct be_adapter *adapter)
4554 {
4555         int status, level;
4556         u16 profile_id;
4557
4558         status = be_cmd_get_cntl_attributes(adapter);
4559         if (status)
4560                 return status;
4561
4562         status = be_cmd_query_fw_cfg(adapter);
4563         if (status)
4564                 return status;
4565
4566         if (!lancer_chip(adapter) && be_physfn(adapter))
4567                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4568
4569         if (BEx_chip(adapter)) {
4570                 level = be_cmd_get_fw_log_level(adapter);
4571                 adapter->msg_enable =
4572                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4573         }
4574
4575         be_cmd_get_acpi_wol_cap(adapter);
4576         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4577         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4578
4579         be_cmd_query_port_name(adapter);
4580
4581         if (be_physfn(adapter)) {
4582                 status = be_cmd_get_active_profile(adapter, &profile_id);
4583                 if (!status)
4584                         dev_info(&adapter->pdev->dev,
4585                                  "Using profile 0x%x\n", profile_id);
4586         }
4587
4588         return 0;
4589 }
4590
4591 static int be_mac_setup(struct be_adapter *adapter)
4592 {
4593         u8 mac[ETH_ALEN];
4594         int status;
4595
4596         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4597                 status = be_cmd_get_perm_mac(adapter, mac);
4598                 if (status)
4599                         return status;
4600
4601                 eth_hw_addr_set(adapter->netdev, mac);
4602                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4603
4604                 /* Initial MAC for BE3 VFs is already programmed by PF */
4605                 if (BEx_chip(adapter) && be_virtfn(adapter))
4606                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4607         }
4608
4609         return 0;
4610 }
4611
4612 static void be_schedule_worker(struct be_adapter *adapter)
4613 {
4614         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4615         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4616 }
4617
4618 static void be_destroy_err_recovery_workq(void)
4619 {
4620         if (!be_err_recovery_workq)
4621                 return;
4622
4623         destroy_workqueue(be_err_recovery_workq);
4624         be_err_recovery_workq = NULL;
4625 }
4626
4627 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4628 {
4629         struct be_error_recovery *err_rec = &adapter->error_recovery;
4630
4631         if (!be_err_recovery_workq)
4632                 return;
4633
4634         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4635                            msecs_to_jiffies(delay));
4636         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4637 }
4638
4639 static int be_setup_queues(struct be_adapter *adapter)
4640 {
4641         struct net_device *netdev = adapter->netdev;
4642         int status;
4643
4644         status = be_evt_queues_create(adapter);
4645         if (status)
4646                 goto err;
4647
4648         status = be_tx_qs_create(adapter);
4649         if (status)
4650                 goto err;
4651
4652         status = be_rx_cqs_create(adapter);
4653         if (status)
4654                 goto err;
4655
4656         status = be_mcc_queues_create(adapter);
4657         if (status)
4658                 goto err;
4659
4660         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4661         if (status)
4662                 goto err;
4663
4664         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4665         if (status)
4666                 goto err;
4667
4668         return 0;
4669 err:
4670         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4671         return status;
4672 }
4673
4674 static int be_if_create(struct be_adapter *adapter)
4675 {
4676         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4677         u32 cap_flags = be_if_cap_flags(adapter);
4678
4679         /* alloc required memory for other filtering fields */
4680         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4681                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4682         if (!adapter->pmac_id)
4683                 return -ENOMEM;
4684
4685         adapter->mc_list = kcalloc(be_max_mc(adapter),
4686                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4687         if (!adapter->mc_list)
4688                 return -ENOMEM;
4689
4690         adapter->uc_list = kcalloc(be_max_uc(adapter),
4691                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4692         if (!adapter->uc_list)
4693                 return -ENOMEM;
4694
4695         if (adapter->cfg_num_rx_irqs == 1)
4696                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4697
4698         en_flags &= cap_flags;
4699         /* will enable all the needed filter flags in be_open() */
4700         return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4701                                   &adapter->if_handle, 0);
4702 }
4703
4704 int be_update_queues(struct be_adapter *adapter)
4705 {
4706         struct net_device *netdev = adapter->netdev;
4707         int status;
4708
4709         if (netif_running(netdev)) {
4710                 /* be_tx_timeout() must not run concurrently with this
4711                  * function, synchronize with an already-running dev_watchdog
4712                  */
4713                 netif_tx_lock_bh(netdev);
4714                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4715                 netif_carrier_off(netdev);
4716                 netif_tx_unlock_bh(netdev);
4717
4718                 be_close(netdev);
4719         }
4720
4721         be_cancel_worker(adapter);
4722
4723         /* If any vectors have been shared with RoCE we cannot re-program
4724          * the MSIx table.
4725          */
4726         if (!adapter->num_msix_roce_vec)
4727                 be_msix_disable(adapter);
4728
4729         be_clear_queues(adapter);
4730         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4731         if (status)
4732                 return status;
4733
4734         if (!msix_enabled(adapter)) {
4735                 status = be_msix_enable(adapter);
4736                 if (status)
4737                         return status;
4738         }
4739
4740         status = be_if_create(adapter);
4741         if (status)
4742                 return status;
4743
4744         status = be_setup_queues(adapter);
4745         if (status)
4746                 return status;
4747
4748         be_schedule_worker(adapter);
4749
4750         /* The IF was destroyed and re-created. We need to clear
4751          * all promiscuous flags valid for the destroyed IF.
4752          * Without this promisc mode is not restored during
4753          * be_open() because the driver thinks that it is
4754          * already enabled in HW.
4755          */
4756         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4757
4758         if (netif_running(netdev))
4759                 status = be_open(netdev);
4760
4761         return status;
4762 }
4763
4764 static inline int fw_major_num(const char *fw_ver)
4765 {
4766         int fw_major = 0, i;
4767
4768         i = sscanf(fw_ver, "%d.", &fw_major);
4769         if (i != 1)
4770                 return 0;
4771
4772         return fw_major;
4773 }
4774
4775 /* If it is error recovery, FLR the PF
4776  * Else if any VFs are already enabled don't FLR the PF
4777  */
4778 static bool be_reset_required(struct be_adapter *adapter)
4779 {
4780         if (be_error_recovering(adapter))
4781                 return true;
4782         else
4783                 return pci_num_vf(adapter->pdev) == 0;
4784 }
4785
4786 /* Wait for the FW to be ready and perform the required initialization */
4787 static int be_func_init(struct be_adapter *adapter)
4788 {
4789         int status;
4790
4791         status = be_fw_wait_ready(adapter);
4792         if (status)
4793                 return status;
4794
4795         /* FW is now ready; clear errors to allow cmds/doorbell */
4796         be_clear_error(adapter, BE_CLEAR_ALL);
4797
4798         if (be_reset_required(adapter)) {
4799                 status = be_cmd_reset_function(adapter);
4800                 if (status)
4801                         return status;
4802
4803                 /* Wait for interrupts to quiesce after an FLR */
4804                 msleep(100);
4805         }
4806
4807         /* Tell FW we're ready to fire cmds */
4808         status = be_cmd_fw_init(adapter);
4809         if (status)
4810                 return status;
4811
4812         /* Allow interrupts for other ULPs running on NIC function */
4813         be_intr_set(adapter, true);
4814
4815         return 0;
4816 }
4817
4818 static int be_setup(struct be_adapter *adapter)
4819 {
4820         struct device *dev = &adapter->pdev->dev;
4821         int status;
4822
4823         status = be_func_init(adapter);
4824         if (status)
4825                 return status;
4826
4827         be_setup_init(adapter);
4828
4829         if (!lancer_chip(adapter))
4830                 be_cmd_req_native_mode(adapter);
4831
4832         /* invoke this cmd first to get pf_num and vf_num which are needed
4833          * for issuing profile related cmds
4834          */
4835         if (!BEx_chip(adapter)) {
4836                 status = be_cmd_get_func_config(adapter, NULL);
4837                 if (status)
4838                         return status;
4839         }
4840
4841         status = be_get_config(adapter);
4842         if (status)
4843                 goto err;
4844
4845         if (!BE2_chip(adapter) && be_physfn(adapter))
4846                 be_alloc_sriov_res(adapter);
4847
4848         status = be_get_resources(adapter);
4849         if (status)
4850                 goto err;
4851
4852         status = be_msix_enable(adapter);
4853         if (status)
4854                 goto err;
4855
4856         /* will enable all the needed filter flags in be_open() */
4857         status = be_if_create(adapter);
4858         if (status)
4859                 goto err;
4860
4861         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4862         rtnl_lock();
4863         status = be_setup_queues(adapter);
4864         rtnl_unlock();
4865         if (status)
4866                 goto err;
4867
4868         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4869
4870         status = be_mac_setup(adapter);
4871         if (status)
4872                 goto err;
4873
4874         be_cmd_get_fw_ver(adapter);
4875         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4876
4877         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4878                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4879                         adapter->fw_ver);
4880                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4881         }
4882
4883         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4884                                          adapter->rx_fc);
4885         if (status)
4886                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4887                                         &adapter->rx_fc);
4888
4889         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4890                  adapter->tx_fc, adapter->rx_fc);
4891
4892         if (be_physfn(adapter))
4893                 be_cmd_set_logical_link_config(adapter,
4894                                                IFLA_VF_LINK_STATE_AUTO, 0);
4895
4896         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4897          * confusing a linux bridge or OVS that it might be connected to.
4898          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4899          * when SRIOV is not enabled.
4900          */
4901         if (BE3_chip(adapter))
4902                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4903                                       PORT_FWD_TYPE_PASSTHRU, 0);
4904
4905         if (adapter->num_vfs)
4906                 be_vf_setup(adapter);
4907
4908         status = be_cmd_get_phy_info(adapter);
4909         if (!status && be_pause_supported(adapter))
4910                 adapter->phy.fc_autoneg = 1;
4911
4912         if (be_physfn(adapter) && !lancer_chip(adapter))
4913                 be_cmd_set_features(adapter);
4914
4915         be_schedule_worker(adapter);
4916         adapter->flags |= BE_FLAGS_SETUP_DONE;
4917         return 0;
4918 err:
4919         be_clear(adapter);
4920         return status;
4921 }
4922
4923 #ifdef CONFIG_NET_POLL_CONTROLLER
4924 static void be_netpoll(struct net_device *netdev)
4925 {
4926         struct be_adapter *adapter = netdev_priv(netdev);
4927         struct be_eq_obj *eqo;
4928         int i;
4929
4930         for_all_evt_queues(adapter, eqo, i) {
4931                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4932                 napi_schedule(&eqo->napi);
4933         }
4934 }
4935 #endif
4936
4937 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4938 {
4939         const struct firmware *fw;
4940         int status;
4941
4942         if (!netif_running(adapter->netdev)) {
4943                 dev_err(&adapter->pdev->dev,
4944                         "Firmware load not allowed (interface is down)\n");
4945                 return -ENETDOWN;
4946         }
4947
4948         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4949         if (status)
4950                 goto fw_exit;
4951
4952         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4953
4954         if (lancer_chip(adapter))
4955                 status = lancer_fw_download(adapter, fw);
4956         else
4957                 status = be_fw_download(adapter, fw);
4958
4959         if (!status)
4960                 be_cmd_get_fw_ver(adapter);
4961
4962 fw_exit:
4963         release_firmware(fw);
4964         return status;
4965 }
4966
4967 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4968                                  u16 flags, struct netlink_ext_ack *extack)
4969 {
4970         struct be_adapter *adapter = netdev_priv(dev);
4971         struct nlattr *attr, *br_spec;
4972         int rem;
4973         int status = 0;
4974         u16 mode = 0;
4975
4976         if (!sriov_enabled(adapter))
4977                 return -EOPNOTSUPP;
4978
4979         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4980         if (!br_spec)
4981                 return -EINVAL;
4982
4983         nla_for_each_nested(attr, br_spec, rem) {
4984                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4985                         continue;
4986
4987                 if (nla_len(attr) < sizeof(mode))
4988                         return -EINVAL;
4989
4990                 mode = nla_get_u16(attr);
4991                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4992                         return -EOPNOTSUPP;
4993
4994                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4995                         return -EINVAL;
4996
4997                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4998                                                adapter->if_handle,
4999                                                mode == BRIDGE_MODE_VEPA ?
5000                                                PORT_FWD_TYPE_VEPA :
5001                                                PORT_FWD_TYPE_VEB, 0);
5002                 if (status)
5003                         goto err;
5004
5005                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5006                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5007
5008                 return status;
5009         }
5010 err:
5011         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5012                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5013
5014         return status;
5015 }
5016
5017 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5018                                  struct net_device *dev, u32 filter_mask,
5019                                  int nlflags)
5020 {
5021         struct be_adapter *adapter = netdev_priv(dev);
5022         int status = 0;
5023         u8 hsw_mode;
5024
5025         /* BE and Lancer chips support VEB mode only */
5026         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5027                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5028                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5029                         return 0;
5030                 hsw_mode = PORT_FWD_TYPE_VEB;
5031         } else {
5032                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5033                                                adapter->if_handle, &hsw_mode,
5034                                                NULL);
5035                 if (status)
5036                         return 0;
5037
5038                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5039                         return 0;
5040         }
5041
5042         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5043                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5044                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5045                                        0, 0, nlflags, filter_mask, NULL);
5046 }
5047
5048 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5049                                          void (*func)(struct work_struct *))
5050 {
5051         struct be_cmd_work *work;
5052
5053         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5054         if (!work) {
5055                 dev_err(&adapter->pdev->dev,
5056                         "be_work memory allocation failed\n");
5057                 return NULL;
5058         }
5059
5060         INIT_WORK(&work->work, func);
5061         work->adapter = adapter;
5062         return work;
5063 }
5064
5065 static netdev_features_t be_features_check(struct sk_buff *skb,
5066                                            struct net_device *dev,
5067                                            netdev_features_t features)
5068 {
5069         struct be_adapter *adapter = netdev_priv(dev);
5070         u8 l4_hdr = 0;
5071
5072         if (skb_is_gso(skb)) {
5073                 /* IPv6 TSO requests with extension hdrs are a problem
5074                  * to Lancer and BE3 HW. Disable TSO6 feature.
5075                  */
5076                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5077                         features &= ~NETIF_F_TSO6;
5078
5079                 /* Lancer cannot handle the packet with MSS less than 256.
5080                  * Also it can't handle a TSO packet with a single segment
5081                  * Disable the GSO support in such cases
5082                  */
5083                 if (lancer_chip(adapter) &&
5084                     (skb_shinfo(skb)->gso_size < 256 ||
5085                      skb_shinfo(skb)->gso_segs == 1))
5086                         features &= ~NETIF_F_GSO_MASK;
5087         }
5088
5089         /* The code below restricts offload features for some tunneled and
5090          * Q-in-Q packets.
5091          * Offload features for normal (non tunnel) packets are unchanged.
5092          */
5093         features = vlan_features_check(skb, features);
5094         if (!skb->encapsulation ||
5095             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5096                 return features;
5097
5098         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5099          * should disable tunnel offload features if it's not a VxLAN packet,
5100          * as tunnel offloads have been enabled only for VxLAN. This is done to
5101          * allow other tunneled traffic like GRE work fine while VxLAN
5102          * offloads are configured in Skyhawk-R.
5103          */
5104         switch (vlan_get_protocol(skb)) {
5105         case htons(ETH_P_IP):
5106                 l4_hdr = ip_hdr(skb)->protocol;
5107                 break;
5108         case htons(ETH_P_IPV6):
5109                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5110                 break;
5111         default:
5112                 return features;
5113         }
5114
5115         if (l4_hdr != IPPROTO_UDP ||
5116             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5117             skb->inner_protocol != htons(ETH_P_TEB) ||
5118             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5119                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5120             !adapter->vxlan_port ||
5121             udp_hdr(skb)->dest != adapter->vxlan_port)
5122                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5123
5124         return features;
5125 }
5126
5127 static int be_get_phys_port_id(struct net_device *dev,
5128                                struct netdev_phys_item_id *ppid)
5129 {
5130         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5131         struct be_adapter *adapter = netdev_priv(dev);
5132         u8 *id;
5133
5134         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5135                 return -ENOSPC;
5136
5137         ppid->id[0] = adapter->hba_port_num + 1;
5138         id = &ppid->id[1];
5139         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5140              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5141                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5142
5143         ppid->id_len = id_len;
5144
5145         return 0;
5146 }
5147
5148 static void be_set_rx_mode(struct net_device *dev)
5149 {
5150         struct be_adapter *adapter = netdev_priv(dev);
5151         struct be_cmd_work *work;
5152
5153         work = be_alloc_work(adapter, be_work_set_rx_mode);
5154         if (work)
5155                 queue_work(be_wq, &work->work);
5156 }
5157
5158 static const struct net_device_ops be_netdev_ops = {
5159         .ndo_open               = be_open,
5160         .ndo_stop               = be_close,
5161         .ndo_start_xmit         = be_xmit,
5162         .ndo_set_rx_mode        = be_set_rx_mode,
5163         .ndo_set_mac_address    = be_mac_addr_set,
5164         .ndo_get_stats64        = be_get_stats64,
5165         .ndo_validate_addr      = eth_validate_addr,
5166         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5167         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5168         .ndo_set_vf_mac         = be_set_vf_mac,
5169         .ndo_set_vf_vlan        = be_set_vf_vlan,
5170         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5171         .ndo_get_vf_config      = be_get_vf_config,
5172         .ndo_set_vf_link_state  = be_set_vf_link_state,
5173         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5174         .ndo_tx_timeout         = be_tx_timeout,
5175 #ifdef CONFIG_NET_POLL_CONTROLLER
5176         .ndo_poll_controller    = be_netpoll,
5177 #endif
5178         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5179         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5180         .ndo_features_check     = be_features_check,
5181         .ndo_get_phys_port_id   = be_get_phys_port_id,
5182 };
5183
5184 static void be_netdev_init(struct net_device *netdev)
5185 {
5186         struct be_adapter *adapter = netdev_priv(netdev);
5187
5188         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5189                 NETIF_F_GSO_UDP_TUNNEL |
5190                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5191                 NETIF_F_HW_VLAN_CTAG_TX;
5192         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5193                 netdev->hw_features |= NETIF_F_RXHASH;
5194
5195         netdev->features |= netdev->hw_features |
5196                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER |
5197                 NETIF_F_HIGHDMA;
5198
5199         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5200                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5201
5202         netdev->priv_flags |= IFF_UNICAST_FLT;
5203
5204         netdev->flags |= IFF_MULTICAST;
5205
5206         netif_set_tso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5207
5208         netdev->netdev_ops = &be_netdev_ops;
5209
5210         netdev->ethtool_ops = &be_ethtool_ops;
5211
5212         if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5213                 netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5214
5215         /* MTU range: 256 - 9000 */
5216         netdev->min_mtu = BE_MIN_MTU;
5217         netdev->max_mtu = BE_MAX_MTU;
5218 }
5219
5220 static void be_cleanup(struct be_adapter *adapter)
5221 {
5222         struct net_device *netdev = adapter->netdev;
5223
5224         rtnl_lock();
5225         netif_device_detach(netdev);
5226         if (netif_running(netdev))
5227                 be_close(netdev);
5228         rtnl_unlock();
5229
5230         be_clear(adapter);
5231 }
5232
5233 static int be_resume(struct be_adapter *adapter)
5234 {
5235         struct net_device *netdev = adapter->netdev;
5236         int status;
5237
5238         status = be_setup(adapter);
5239         if (status)
5240                 return status;
5241
5242         rtnl_lock();
5243         if (netif_running(netdev))
5244                 status = be_open(netdev);
5245         rtnl_unlock();
5246
5247         if (status)
5248                 return status;
5249
5250         netif_device_attach(netdev);
5251
5252         return 0;
5253 }
5254
5255 static void be_soft_reset(struct be_adapter *adapter)
5256 {
5257         u32 val;
5258
5259         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5260         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5261         val |= SLIPORT_SOFTRESET_SR_MASK;
5262         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5263 }
5264
5265 static bool be_err_is_recoverable(struct be_adapter *adapter)
5266 {
5267         struct be_error_recovery *err_rec = &adapter->error_recovery;
5268         unsigned long initial_idle_time =
5269                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5270         unsigned long recovery_interval =
5271                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5272         u16 ue_err_code;
5273         u32 val;
5274
5275         val = be_POST_stage_get(adapter);
5276         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5277                 return false;
5278         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5279         if (ue_err_code == 0)
5280                 return false;
5281
5282         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5283                 ue_err_code);
5284
5285         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5286                 dev_err(&adapter->pdev->dev,
5287                         "Cannot recover within %lu sec from driver load\n",
5288                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5289                 return false;
5290         }
5291
5292         if (err_rec->last_recovery_time && time_before_eq(
5293                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5294                 dev_err(&adapter->pdev->dev,
5295                         "Cannot recover within %lu sec from last recovery\n",
5296                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5297                 return false;
5298         }
5299
5300         if (ue_err_code == err_rec->last_err_code) {
5301                 dev_err(&adapter->pdev->dev,
5302                         "Cannot recover from a consecutive TPE error\n");
5303                 return false;
5304         }
5305
5306         err_rec->last_recovery_time = jiffies;
5307         err_rec->last_err_code = ue_err_code;
5308         return true;
5309 }
5310
5311 static int be_tpe_recover(struct be_adapter *adapter)
5312 {
5313         struct be_error_recovery *err_rec = &adapter->error_recovery;
5314         int status = -EAGAIN;
5315         u32 val;
5316
5317         switch (err_rec->recovery_state) {
5318         case ERR_RECOVERY_ST_NONE:
5319                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5320                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5321                 break;
5322
5323         case ERR_RECOVERY_ST_DETECT:
5324                 val = be_POST_stage_get(adapter);
5325                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5326                     POST_STAGE_RECOVERABLE_ERR) {
5327                         dev_err(&adapter->pdev->dev,
5328                                 "Unrecoverable HW error detected: 0x%x\n", val);
5329                         status = -EINVAL;
5330                         err_rec->resched_delay = 0;
5331                         break;
5332                 }
5333
5334                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5335
5336                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5337                  * milliseconds before it checks for final error status in
5338                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5339                  * If it does, then PF0 initiates a Soft Reset.
5340                  */
5341                 if (adapter->pf_num == 0) {
5342                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5343                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5344                                         ERR_RECOVERY_UE_DETECT_DURATION;
5345                         break;
5346                 }
5347
5348                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5349                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5350                                         ERR_RECOVERY_UE_DETECT_DURATION;
5351                 break;
5352
5353         case ERR_RECOVERY_ST_RESET:
5354                 if (!be_err_is_recoverable(adapter)) {
5355                         dev_err(&adapter->pdev->dev,
5356                                 "Failed to meet recovery criteria\n");
5357                         status = -EIO;
5358                         err_rec->resched_delay = 0;
5359                         break;
5360                 }
5361                 be_soft_reset(adapter);
5362                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5363                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5364                                         err_rec->ue_to_reset_time;
5365                 break;
5366
5367         case ERR_RECOVERY_ST_PRE_POLL:
5368                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5369                 err_rec->resched_delay = 0;
5370                 status = 0;                     /* done */
5371                 break;
5372
5373         default:
5374                 status = -EINVAL;
5375                 err_rec->resched_delay = 0;
5376                 break;
5377         }
5378
5379         return status;
5380 }
5381
5382 static int be_err_recover(struct be_adapter *adapter)
5383 {
5384         int status;
5385
5386         if (!lancer_chip(adapter)) {
5387                 if (!adapter->error_recovery.recovery_supported ||
5388                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5389                         return -EIO;
5390                 status = be_tpe_recover(adapter);
5391                 if (status)
5392                         goto err;
5393         }
5394
5395         /* Wait for adapter to reach quiescent state before
5396          * destroying queues
5397          */
5398         status = be_fw_wait_ready(adapter);
5399         if (status)
5400                 goto err;
5401
5402         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5403
5404         be_cleanup(adapter);
5405
5406         status = be_resume(adapter);
5407         if (status)
5408                 goto err;
5409
5410         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5411
5412 err:
5413         return status;
5414 }
5415
5416 static void be_err_detection_task(struct work_struct *work)
5417 {
5418         struct be_error_recovery *err_rec =
5419                         container_of(work, struct be_error_recovery,
5420                                      err_detection_work.work);
5421         struct be_adapter *adapter =
5422                         container_of(err_rec, struct be_adapter,
5423                                      error_recovery);
5424         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5425         struct device *dev = &adapter->pdev->dev;
5426         int recovery_status;
5427
5428         be_detect_error(adapter);
5429         if (!be_check_error(adapter, BE_ERROR_HW))
5430                 goto reschedule_task;
5431
5432         recovery_status = be_err_recover(adapter);
5433         if (!recovery_status) {
5434                 err_rec->recovery_retries = 0;
5435                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5436                 dev_info(dev, "Adapter recovery successful\n");
5437                 goto reschedule_task;
5438         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5439                 /* BEx/SH recovery state machine */
5440                 if (adapter->pf_num == 0 &&
5441                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5442                         dev_err(&adapter->pdev->dev,
5443                                 "Adapter recovery in progress\n");
5444                 resched_delay = err_rec->resched_delay;
5445                 goto reschedule_task;
5446         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5447                 /* For VFs, check if PF have allocated resources
5448                  * every second.
5449                  */
5450                 dev_err(dev, "Re-trying adapter recovery\n");
5451                 goto reschedule_task;
5452         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5453                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5454                 /* In case of another error during recovery, it takes 30 sec
5455                  * for adapter to come out of error. Retry error recovery after
5456                  * this time interval.
5457                  */
5458                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5459                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5460                 goto reschedule_task;
5461         } else {
5462                 dev_err(dev, "Adapter recovery failed\n");
5463                 dev_err(dev, "Please reboot server to recover\n");
5464         }
5465
5466         return;
5467
5468 reschedule_task:
5469         be_schedule_err_detection(adapter, resched_delay);
5470 }
5471
5472 static void be_log_sfp_info(struct be_adapter *adapter)
5473 {
5474         int status;
5475
5476         status = be_cmd_query_sfp_info(adapter);
5477         if (!status) {
5478                 dev_err(&adapter->pdev->dev,
5479                         "Port %c: %s Vendor: %s part no: %s",
5480                         adapter->port_name,
5481                         be_misconfig_evt_port_state[adapter->phy_state],
5482                         adapter->phy.vendor_name,
5483                         adapter->phy.vendor_pn);
5484         }
5485         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5486 }
5487
5488 static void be_worker(struct work_struct *work)
5489 {
5490         struct be_adapter *adapter =
5491                 container_of(work, struct be_adapter, work.work);
5492         struct be_rx_obj *rxo;
5493         int i;
5494
5495         if (be_physfn(adapter) &&
5496             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5497                 be_cmd_get_die_temperature(adapter);
5498
5499         /* when interrupts are not yet enabled, just reap any pending
5500          * mcc completions
5501          */
5502         if (!netif_running(adapter->netdev)) {
5503                 local_bh_disable();
5504                 be_process_mcc(adapter);
5505                 local_bh_enable();
5506                 goto reschedule;
5507         }
5508
5509         if (!adapter->stats_cmd_sent) {
5510                 if (lancer_chip(adapter))
5511                         lancer_cmd_get_pport_stats(adapter,
5512                                                    &adapter->stats_cmd);
5513                 else
5514                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5515         }
5516
5517         for_all_rx_queues(adapter, rxo, i) {
5518                 /* Replenish RX-queues starved due to memory
5519                  * allocation failures.
5520                  */
5521                 if (rxo->rx_post_starved)
5522                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5523         }
5524
5525         /* EQ-delay update for Skyhawk is done while notifying EQ */
5526         if (!skyhawk_chip(adapter))
5527                 be_eqd_update(adapter, false);
5528
5529         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5530                 be_log_sfp_info(adapter);
5531
5532 reschedule:
5533         adapter->work_counter++;
5534         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5535 }
5536
5537 static void be_unmap_pci_bars(struct be_adapter *adapter)
5538 {
5539         if (adapter->csr)
5540                 pci_iounmap(adapter->pdev, adapter->csr);
5541         if (adapter->db)
5542                 pci_iounmap(adapter->pdev, adapter->db);
5543         if (adapter->pcicfg && adapter->pcicfg_mapped)
5544                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5545 }
5546
5547 static int db_bar(struct be_adapter *adapter)
5548 {
5549         if (lancer_chip(adapter) || be_virtfn(adapter))
5550                 return 0;
5551         else
5552                 return 4;
5553 }
5554
5555 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5556 {
5557         if (skyhawk_chip(adapter)) {
5558                 adapter->roce_db.size = 4096;
5559                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5560                                                               db_bar(adapter));
5561                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5562                                                                db_bar(adapter));
5563         }
5564         return 0;
5565 }
5566
5567 static int be_map_pci_bars(struct be_adapter *adapter)
5568 {
5569         struct pci_dev *pdev = adapter->pdev;
5570         u8 __iomem *addr;
5571         u32 sli_intf;
5572
5573         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5574         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5575                                 SLI_INTF_FAMILY_SHIFT;
5576         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5577
5578         if (BEx_chip(adapter) && be_physfn(adapter)) {
5579                 adapter->csr = pci_iomap(pdev, 2, 0);
5580                 if (!adapter->csr)
5581                         return -ENOMEM;
5582         }
5583
5584         addr = pci_iomap(pdev, db_bar(adapter), 0);
5585         if (!addr)
5586                 goto pci_map_err;
5587         adapter->db = addr;
5588
5589         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5590                 if (be_physfn(adapter)) {
5591                         /* PCICFG is the 2nd BAR in BE2 */
5592                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5593                         if (!addr)
5594                                 goto pci_map_err;
5595                         adapter->pcicfg = addr;
5596                         adapter->pcicfg_mapped = true;
5597                 } else {
5598                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5599                         adapter->pcicfg_mapped = false;
5600                 }
5601         }
5602
5603         be_roce_map_pci_bars(adapter);
5604         return 0;
5605
5606 pci_map_err:
5607         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5608         be_unmap_pci_bars(adapter);
5609         return -ENOMEM;
5610 }
5611
5612 static void be_drv_cleanup(struct be_adapter *adapter)
5613 {
5614         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5615         struct device *dev = &adapter->pdev->dev;
5616
5617         if (mem->va)
5618                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5619
5620         mem = &adapter->rx_filter;
5621         if (mem->va)
5622                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5623
5624         mem = &adapter->stats_cmd;
5625         if (mem->va)
5626                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5627 }
5628
5629 /* Allocate and initialize various fields in be_adapter struct */
5630 static int be_drv_init(struct be_adapter *adapter)
5631 {
5632         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5633         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5634         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5635         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5636         struct device *dev = &adapter->pdev->dev;
5637         int status = 0;
5638
5639         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5640         mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5641                                                 &mbox_mem_alloc->dma,
5642                                                 GFP_KERNEL);
5643         if (!mbox_mem_alloc->va)
5644                 return -ENOMEM;
5645
5646         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5647         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5648         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5649
5650         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5651         rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5652                                            &rx_filter->dma, GFP_KERNEL);
5653         if (!rx_filter->va) {
5654                 status = -ENOMEM;
5655                 goto free_mbox;
5656         }
5657
5658         if (lancer_chip(adapter))
5659                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5660         else if (BE2_chip(adapter))
5661                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5662         else if (BE3_chip(adapter))
5663                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5664         else
5665                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5666         stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5667                                            &stats_cmd->dma, GFP_KERNEL);
5668         if (!stats_cmd->va) {
5669                 status = -ENOMEM;
5670                 goto free_rx_filter;
5671         }
5672
5673         mutex_init(&adapter->mbox_lock);
5674         mutex_init(&adapter->mcc_lock);
5675         mutex_init(&adapter->rx_filter_lock);
5676         spin_lock_init(&adapter->mcc_cq_lock);
5677         init_completion(&adapter->et_cmd_compl);
5678
5679         pci_save_state(adapter->pdev);
5680
5681         INIT_DELAYED_WORK(&adapter->work, be_worker);
5682
5683         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5684         adapter->error_recovery.resched_delay = 0;
5685         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5686                           be_err_detection_task);
5687
5688         adapter->rx_fc = true;
5689         adapter->tx_fc = true;
5690
5691         /* Must be a power of 2 or else MODULO will BUG_ON */
5692         adapter->be_get_temp_freq = 64;
5693
5694         return 0;
5695
5696 free_rx_filter:
5697         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5698 free_mbox:
5699         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5700                           mbox_mem_alloc->dma);
5701         return status;
5702 }
5703
5704 static void be_remove(struct pci_dev *pdev)
5705 {
5706         struct be_adapter *adapter = pci_get_drvdata(pdev);
5707
5708         if (!adapter)
5709                 return;
5710
5711         be_roce_dev_remove(adapter);
5712         be_intr_set(adapter, false);
5713
5714         be_cancel_err_detection(adapter);
5715
5716         unregister_netdev(adapter->netdev);
5717
5718         be_clear(adapter);
5719
5720         if (!pci_vfs_assigned(adapter->pdev))
5721                 be_cmd_reset_function(adapter);
5722
5723         /* tell fw we're done with firing cmds */
5724         be_cmd_fw_clean(adapter);
5725
5726         be_unmap_pci_bars(adapter);
5727         be_drv_cleanup(adapter);
5728
5729         pci_disable_pcie_error_reporting(pdev);
5730
5731         pci_release_regions(pdev);
5732         pci_disable_device(pdev);
5733
5734         free_netdev(adapter->netdev);
5735 }
5736
5737 static ssize_t be_hwmon_show_temp(struct device *dev,
5738                                   struct device_attribute *dev_attr,
5739                                   char *buf)
5740 {
5741         struct be_adapter *adapter = dev_get_drvdata(dev);
5742
5743         /* Unit: millidegree Celsius */
5744         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5745                 return -EIO;
5746         else
5747                 return sprintf(buf, "%u\n",
5748                                adapter->hwmon_info.be_on_die_temp * 1000);
5749 }
5750
5751 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5752                           be_hwmon_show_temp, NULL, 1);
5753
5754 static struct attribute *be_hwmon_attrs[] = {
5755         &sensor_dev_attr_temp1_input.dev_attr.attr,
5756         NULL
5757 };
5758
5759 ATTRIBUTE_GROUPS(be_hwmon);
5760
5761 static char *mc_name(struct be_adapter *adapter)
5762 {
5763         char *str = ""; /* default */
5764
5765         switch (adapter->mc_type) {
5766         case UMC:
5767                 str = "UMC";
5768                 break;
5769         case FLEX10:
5770                 str = "FLEX10";
5771                 break;
5772         case vNIC1:
5773                 str = "vNIC-1";
5774                 break;
5775         case nPAR:
5776                 str = "nPAR";
5777                 break;
5778         case UFP:
5779                 str = "UFP";
5780                 break;
5781         case vNIC2:
5782                 str = "vNIC-2";
5783                 break;
5784         default:
5785                 str = "";
5786         }
5787
5788         return str;
5789 }
5790
5791 static inline char *func_name(struct be_adapter *adapter)
5792 {
5793         return be_physfn(adapter) ? "PF" : "VF";
5794 }
5795
5796 static inline char *nic_name(struct pci_dev *pdev)
5797 {
5798         switch (pdev->device) {
5799         case OC_DEVICE_ID1:
5800                 return OC_NAME;
5801         case OC_DEVICE_ID2:
5802                 return OC_NAME_BE;
5803         case OC_DEVICE_ID3:
5804         case OC_DEVICE_ID4:
5805                 return OC_NAME_LANCER;
5806         case BE_DEVICE_ID2:
5807                 return BE3_NAME;
5808         case OC_DEVICE_ID5:
5809         case OC_DEVICE_ID6:
5810                 return OC_NAME_SH;
5811         default:
5812                 return BE_NAME;
5813         }
5814 }
5815
5816 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5817 {
5818         struct be_adapter *adapter;
5819         struct net_device *netdev;
5820         int status = 0;
5821
5822         status = pci_enable_device(pdev);
5823         if (status)
5824                 goto do_none;
5825
5826         status = pci_request_regions(pdev, DRV_NAME);
5827         if (status)
5828                 goto disable_dev;
5829         pci_set_master(pdev);
5830
5831         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5832         if (!netdev) {
5833                 status = -ENOMEM;
5834                 goto rel_reg;
5835         }
5836         adapter = netdev_priv(netdev);
5837         adapter->pdev = pdev;
5838         pci_set_drvdata(pdev, adapter);
5839         adapter->netdev = netdev;
5840         SET_NETDEV_DEV(netdev, &pdev->dev);
5841
5842         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5843         if (status) {
5844                 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5845                 goto free_netdev;
5846         }
5847
5848         status = pci_enable_pcie_error_reporting(pdev);
5849         if (!status)
5850                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5851
5852         status = be_map_pci_bars(adapter);
5853         if (status)
5854                 goto free_netdev;
5855
5856         status = be_drv_init(adapter);
5857         if (status)
5858                 goto unmap_bars;
5859
5860         status = be_setup(adapter);
5861         if (status)
5862                 goto drv_cleanup;
5863
5864         be_netdev_init(netdev);
5865         status = register_netdev(netdev);
5866         if (status != 0)
5867                 goto unsetup;
5868
5869         be_roce_dev_add(adapter);
5870
5871         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5872         adapter->error_recovery.probe_time = jiffies;
5873
5874         /* On Die temperature not supported for VF. */
5875         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5876                 adapter->hwmon_info.hwmon_dev =
5877                         devm_hwmon_device_register_with_groups(&pdev->dev,
5878                                                                DRV_NAME,
5879                                                                adapter,
5880                                                                be_hwmon_groups);
5881                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5882         }
5883
5884         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5885                  func_name(adapter), mc_name(adapter), adapter->port_name);
5886
5887         return 0;
5888
5889 unsetup:
5890         be_clear(adapter);
5891 drv_cleanup:
5892         be_drv_cleanup(adapter);
5893 unmap_bars:
5894         be_unmap_pci_bars(adapter);
5895 free_netdev:
5896         pci_disable_pcie_error_reporting(pdev);
5897         free_netdev(netdev);
5898 rel_reg:
5899         pci_release_regions(pdev);
5900 disable_dev:
5901         pci_disable_device(pdev);
5902 do_none:
5903         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5904         return status;
5905 }
5906
5907 static int __maybe_unused be_suspend(struct device *dev_d)
5908 {
5909         struct be_adapter *adapter = dev_get_drvdata(dev_d);
5910
5911         be_intr_set(adapter, false);
5912         be_cancel_err_detection(adapter);
5913
5914         be_cleanup(adapter);
5915
5916         return 0;
5917 }
5918
5919 static int __maybe_unused be_pci_resume(struct device *dev_d)
5920 {
5921         struct be_adapter *adapter = dev_get_drvdata(dev_d);
5922         int status = 0;
5923
5924         status = be_resume(adapter);
5925         if (status)
5926                 return status;
5927
5928         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5929
5930         return 0;
5931 }
5932
5933 /*
5934  * An FLR will stop BE from DMAing any data.
5935  */
5936 static void be_shutdown(struct pci_dev *pdev)
5937 {
5938         struct be_adapter *adapter = pci_get_drvdata(pdev);
5939
5940         if (!adapter)
5941                 return;
5942
5943         be_roce_dev_shutdown(adapter);
5944         cancel_delayed_work_sync(&adapter->work);
5945         be_cancel_err_detection(adapter);
5946
5947         netif_device_detach(adapter->netdev);
5948
5949         be_cmd_reset_function(adapter);
5950
5951         pci_disable_device(pdev);
5952 }
5953
5954 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5955                                             pci_channel_state_t state)
5956 {
5957         struct be_adapter *adapter = pci_get_drvdata(pdev);
5958
5959         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5960
5961         be_roce_dev_remove(adapter);
5962
5963         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5964                 be_set_error(adapter, BE_ERROR_EEH);
5965
5966                 be_cancel_err_detection(adapter);
5967
5968                 be_cleanup(adapter);
5969         }
5970
5971         if (state == pci_channel_io_perm_failure)
5972                 return PCI_ERS_RESULT_DISCONNECT;
5973
5974         pci_disable_device(pdev);
5975
5976         /* The error could cause the FW to trigger a flash debug dump.
5977          * Resetting the card while flash dump is in progress
5978          * can cause it not to recover; wait for it to finish.
5979          * Wait only for first function as it is needed only once per
5980          * adapter.
5981          */
5982         if (pdev->devfn == 0)
5983                 ssleep(30);
5984
5985         return PCI_ERS_RESULT_NEED_RESET;
5986 }
5987
5988 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5989 {
5990         struct be_adapter *adapter = pci_get_drvdata(pdev);
5991         int status;
5992
5993         dev_info(&adapter->pdev->dev, "EEH reset\n");
5994
5995         status = pci_enable_device(pdev);
5996         if (status)
5997                 return PCI_ERS_RESULT_DISCONNECT;
5998
5999         pci_set_master(pdev);
6000         pci_restore_state(pdev);
6001
6002         /* Check if card is ok and fw is ready */
6003         dev_info(&adapter->pdev->dev,
6004                  "Waiting for FW to be ready after EEH reset\n");
6005         status = be_fw_wait_ready(adapter);
6006         if (status)
6007                 return PCI_ERS_RESULT_DISCONNECT;
6008
6009         be_clear_error(adapter, BE_CLEAR_ALL);
6010         return PCI_ERS_RESULT_RECOVERED;
6011 }
6012
6013 static void be_eeh_resume(struct pci_dev *pdev)
6014 {
6015         int status = 0;
6016         struct be_adapter *adapter = pci_get_drvdata(pdev);
6017
6018         dev_info(&adapter->pdev->dev, "EEH resume\n");
6019
6020         pci_save_state(pdev);
6021
6022         status = be_resume(adapter);
6023         if (status)
6024                 goto err;
6025
6026         be_roce_dev_add(adapter);
6027
6028         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6029         return;
6030 err:
6031         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6032 }
6033
6034 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6035 {
6036         struct be_adapter *adapter = pci_get_drvdata(pdev);
6037         struct be_resources vft_res = {0};
6038         int status;
6039
6040         if (!num_vfs)
6041                 be_vf_clear(adapter);
6042
6043         adapter->num_vfs = num_vfs;
6044
6045         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6046                 dev_warn(&pdev->dev,
6047                          "Cannot disable VFs while they are assigned\n");
6048                 return -EBUSY;
6049         }
6050
6051         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6052          * are equally distributed across the max-number of VFs. The user may
6053          * request only a subset of the max-vfs to be enabled.
6054          * Based on num_vfs, redistribute the resources across num_vfs so that
6055          * each VF will have access to more number of resources.
6056          * This facility is not available in BE3 FW.
6057          * Also, this is done by FW in Lancer chip.
6058          */
6059         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6060                 be_calculate_vf_res(adapter, adapter->num_vfs,
6061                                     &vft_res);
6062                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6063                                                  adapter->num_vfs, &vft_res);
6064                 if (status)
6065                         dev_err(&pdev->dev,
6066                                 "Failed to optimize SR-IOV resources\n");
6067         }
6068
6069         status = be_get_resources(adapter);
6070         if (status)
6071                 return be_cmd_status(status);
6072
6073         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6074         rtnl_lock();
6075         status = be_update_queues(adapter);
6076         rtnl_unlock();
6077         if (status)
6078                 return be_cmd_status(status);
6079
6080         if (adapter->num_vfs)
6081                 status = be_vf_setup(adapter);
6082
6083         if (!status)
6084                 return adapter->num_vfs;
6085
6086         return 0;
6087 }
6088
6089 static const struct pci_error_handlers be_eeh_handlers = {
6090         .error_detected = be_eeh_err_detected,
6091         .slot_reset = be_eeh_reset,
6092         .resume = be_eeh_resume,
6093 };
6094
6095 static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6096
6097 static struct pci_driver be_driver = {
6098         .name = DRV_NAME,
6099         .id_table = be_dev_ids,
6100         .probe = be_probe,
6101         .remove = be_remove,
6102         .driver.pm = &be_pci_pm_ops,
6103         .shutdown = be_shutdown,
6104         .sriov_configure = be_pci_sriov_configure,
6105         .err_handler = &be_eeh_handlers
6106 };
6107
6108 static int __init be_init_module(void)
6109 {
6110         int status;
6111
6112         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6113             rx_frag_size != 2048) {
6114                 printk(KERN_WARNING DRV_NAME
6115                         " : Module param rx_frag_size must be 2048/4096/8192."
6116                         " Using 2048\n");
6117                 rx_frag_size = 2048;
6118         }
6119
6120         if (num_vfs > 0) {
6121                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6122                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6123         }
6124
6125         be_wq = create_singlethread_workqueue("be_wq");
6126         if (!be_wq) {
6127                 pr_warn(DRV_NAME "workqueue creation failed\n");
6128                 return -1;
6129         }
6130
6131         be_err_recovery_workq =
6132                 create_singlethread_workqueue("be_err_recover");
6133         if (!be_err_recovery_workq)
6134                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6135
6136         status = pci_register_driver(&be_driver);
6137         if (status) {
6138                 destroy_workqueue(be_wq);
6139                 be_destroy_err_recovery_workq();
6140         }
6141         return status;
6142 }
6143 module_init(be_init_module);
6144
6145 static void __exit be_exit_module(void)
6146 {
6147         pci_unregister_driver(&be_driver);
6148
6149         be_destroy_err_recovery_workq();
6150
6151         if (be_wq)
6152                 destroy_workqueue(be_wq);
6153 }
6154 module_exit(be_exit_module);