cb004fd16252730c09e3bb67a0d95a66e27f5958
[linux-block.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005 - 2016 Broadcom
4  * All rights reserved.
5  *
6  * Contact Information:
7  * linux-drivers@emulex.com
8  *
9  * Emulex
10  * 3333 Susan Street
11  * Costa Mesa, CA 92626
12  */
13
14 #include <linux/prefetch.h>
15 #include <linux/module.h>
16 #include "be.h"
17 #include "be_cmds.h"
18 #include <asm/div64.h>
19 #include <linux/if_bridge.h>
20 #include <net/busy_poll.h>
21 #include <net/vxlan.h>
22
23 MODULE_DESCRIPTION(DRV_DESC);
24 MODULE_AUTHOR("Emulex Corporation");
25 MODULE_LICENSE("GPL");
26
27 /* num_vfs module param is obsolete.
28  * Use sysfs method to enable/disable VFs.
29  */
30 static unsigned int num_vfs;
31 module_param(num_vfs, uint, 0444);
32 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
33
34 static ushort rx_frag_size = 2048;
35 module_param(rx_frag_size, ushort, 0444);
36 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
37
38 /* Per-module error detection/recovery workq shared across all functions.
39  * Each function schedules its own work request on this shared workq.
40  */
41 static struct workqueue_struct *be_err_recovery_workq;
42
43 static const struct pci_device_id be_dev_ids[] = {
44 #ifdef CONFIG_BE2NET_BE2
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
47 #endif /* CONFIG_BE2NET_BE2 */
48 #ifdef CONFIG_BE2NET_BE3
49         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
50         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
51 #endif /* CONFIG_BE2NET_BE3 */
52 #ifdef CONFIG_BE2NET_LANCER
53         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
55 #endif /* CONFIG_BE2NET_LANCER */
56 #ifdef CONFIG_BE2NET_SKYHAWK
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
58         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
59 #endif /* CONFIG_BE2NET_SKYHAWK */
60         { 0 }
61 };
62 MODULE_DEVICE_TABLE(pci, be_dev_ids);
63
64 /* Workqueue used by all functions for defering cmd calls to the adapter */
65 static struct workqueue_struct *be_wq;
66
67 /* UE Status Low CSR */
68 static const char * const ue_status_low_desc[] = {
69         "CEV",
70         "CTX",
71         "DBUF",
72         "ERX",
73         "Host",
74         "MPU",
75         "NDMA",
76         "PTC ",
77         "RDMA ",
78         "RXF ",
79         "RXIPS ",
80         "RXULP0 ",
81         "RXULP1 ",
82         "RXULP2 ",
83         "TIM ",
84         "TPOST ",
85         "TPRE ",
86         "TXIPS ",
87         "TXULP0 ",
88         "TXULP1 ",
89         "UC ",
90         "WDMA ",
91         "TXULP2 ",
92         "HOST1 ",
93         "P0_OB_LINK ",
94         "P1_OB_LINK ",
95         "HOST_GPIO ",
96         "MBOX ",
97         "ERX2 ",
98         "SPARE ",
99         "JTAG ",
100         "MPU_INTPEND "
101 };
102
103 /* UE Status High CSR */
104 static const char * const ue_status_hi_desc[] = {
105         "LPCMEMHOST",
106         "MGMT_MAC",
107         "PCS0ONLINE",
108         "MPU_IRAM",
109         "PCS1ONLINE",
110         "PCTL0",
111         "PCTL1",
112         "PMEM",
113         "RR",
114         "TXPB",
115         "RXPP",
116         "XAUI",
117         "TXP",
118         "ARM",
119         "IPC",
120         "HOST2",
121         "HOST3",
122         "HOST4",
123         "HOST5",
124         "HOST6",
125         "HOST7",
126         "ECRC",
127         "Poison TLP",
128         "NETC",
129         "PERIPH",
130         "LLTXULP",
131         "D2P",
132         "RCON",
133         "LDMA",
134         "LLTXP",
135         "LLTXPB",
136         "Unknown"
137 };
138
139 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
140                                  BE_IF_FLAGS_BROADCAST | \
141                                  BE_IF_FLAGS_MULTICAST | \
142                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
143
144 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
145 {
146         struct be_dma_mem *mem = &q->dma_mem;
147
148         if (mem->va) {
149                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
150                                   mem->dma);
151                 mem->va = NULL;
152         }
153 }
154
155 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
156                           u16 len, u16 entry_size)
157 {
158         struct be_dma_mem *mem = &q->dma_mem;
159
160         memset(q, 0, sizeof(*q));
161         q->len = len;
162         q->entry_size = entry_size;
163         mem->size = len * entry_size;
164         mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
165                                      &mem->dma, GFP_KERNEL);
166         if (!mem->va)
167                 return -ENOMEM;
168         return 0;
169 }
170
171 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
172 {
173         u32 reg, enabled;
174
175         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
176                               &reg);
177         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
178
179         if (!enabled && enable)
180                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else if (enabled && !enable)
182                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
183         else
184                 return;
185
186         pci_write_config_dword(adapter->pdev,
187                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
188 }
189
190 static void be_intr_set(struct be_adapter *adapter, bool enable)
191 {
192         int status = 0;
193
194         /* On lancer interrupts can't be controlled via this register */
195         if (lancer_chip(adapter))
196                 return;
197
198         if (be_check_error(adapter, BE_ERROR_EEH))
199                 return;
200
201         status = be_cmd_intr_set(adapter, enable);
202         if (status)
203                 be_reg_intr_set(adapter, enable);
204 }
205
206 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
207 {
208         u32 val = 0;
209
210         if (be_check_error(adapter, BE_ERROR_HW))
211                 return;
212
213         val |= qid & DB_RQ_RING_ID_MASK;
214         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
215
216         wmb();
217         iowrite32(val, adapter->db + DB_RQ_OFFSET);
218 }
219
220 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
221                           u16 posted)
222 {
223         u32 val = 0;
224
225         if (be_check_error(adapter, BE_ERROR_HW))
226                 return;
227
228         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
229         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
230
231         wmb();
232         iowrite32(val, adapter->db + txo->db_offset);
233 }
234
235 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
236                          bool arm, bool clear_int, u16 num_popped,
237                          u32 eq_delay_mult_enc)
238 {
239         u32 val = 0;
240
241         val |= qid & DB_EQ_RING_ID_MASK;
242         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
243
244         if (be_check_error(adapter, BE_ERROR_HW))
245                 return;
246
247         if (arm)
248                 val |= 1 << DB_EQ_REARM_SHIFT;
249         if (clear_int)
250                 val |= 1 << DB_EQ_CLR_SHIFT;
251         val |= 1 << DB_EQ_EVNT_SHIFT;
252         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
253         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
254         iowrite32(val, adapter->db + DB_EQ_OFFSET);
255 }
256
257 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
258 {
259         u32 val = 0;
260
261         val |= qid & DB_CQ_RING_ID_MASK;
262         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
263                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
264
265         if (be_check_error(adapter, BE_ERROR_HW))
266                 return;
267
268         if (arm)
269                 val |= 1 << DB_CQ_REARM_SHIFT;
270         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
271         iowrite32(val, adapter->db + DB_CQ_OFFSET);
272 }
273
274 static int be_dev_mac_add(struct be_adapter *adapter, const u8 *mac)
275 {
276         int i;
277
278         /* Check if mac has already been added as part of uc-list */
279         for (i = 0; i < adapter->uc_macs; i++) {
280                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
281                         /* mac already added, skip addition */
282                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
283                         return 0;
284                 }
285         }
286
287         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
288                                &adapter->pmac_id[0], 0);
289 }
290
291 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
292 {
293         int i;
294
295         /* Skip deletion if the programmed mac is
296          * being used in uc-list
297          */
298         for (i = 0; i < adapter->uc_macs; i++) {
299                 if (adapter->pmac_id[i + 1] == pmac_id)
300                         return;
301         }
302         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
303 }
304
305 static int be_mac_addr_set(struct net_device *netdev, void *p)
306 {
307         struct be_adapter *adapter = netdev_priv(netdev);
308         struct device *dev = &adapter->pdev->dev;
309         struct sockaddr *addr = p;
310         int status;
311         u8 mac[ETH_ALEN];
312         u32 old_pmac_id = adapter->pmac_id[0];
313
314         if (!is_valid_ether_addr(addr->sa_data))
315                 return -EADDRNOTAVAIL;
316
317         /* Proceed further only if, User provided MAC is different
318          * from active MAC
319          */
320         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
321                 return 0;
322
323         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
324          * address
325          */
326         if (BEx_chip(adapter) && be_virtfn(adapter) &&
327             !check_privilege(adapter, BE_PRIV_FILTMGMT))
328                 return -EPERM;
329
330         /* if device is not running, copy MAC to netdev->dev_addr */
331         if (!netif_running(netdev))
332                 goto done;
333
334         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
335          * privilege or if PF did not provision the new MAC address.
336          * On BE3, this cmd will always fail if the VF doesn't have the
337          * FILTMGMT privilege. This failure is OK, only if the PF programmed
338          * the MAC for the VF.
339          */
340         mutex_lock(&adapter->rx_filter_lock);
341         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
342         if (!status) {
343
344                 /* Delete the old programmed MAC. This call may fail if the
345                  * old MAC was already deleted by the PF driver.
346                  */
347                 if (adapter->pmac_id[0] != old_pmac_id)
348                         be_dev_mac_del(adapter, old_pmac_id);
349         }
350
351         mutex_unlock(&adapter->rx_filter_lock);
352         /* Decide if the new MAC is successfully activated only after
353          * querying the FW
354          */
355         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
356                                        adapter->if_handle, true, 0);
357         if (status)
358                 goto err;
359
360         /* The MAC change did not happen, either due to lack of privilege
361          * or PF didn't pre-provision.
362          */
363         if (!ether_addr_equal(addr->sa_data, mac)) {
364                 status = -EPERM;
365                 goto err;
366         }
367
368         /* Remember currently programmed MAC */
369         ether_addr_copy(adapter->dev_mac, addr->sa_data);
370 done:
371         eth_hw_addr_set(netdev, addr->sa_data);
372         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
373         return 0;
374 err:
375         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
376         return status;
377 }
378
379 /* BE2 supports only v0 cmd */
380 static void *hw_stats_from_cmd(struct be_adapter *adapter)
381 {
382         if (BE2_chip(adapter)) {
383                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
384
385                 return &cmd->hw_stats;
386         } else if (BE3_chip(adapter)) {
387                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
388
389                 return &cmd->hw_stats;
390         } else {
391                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
392
393                 return &cmd->hw_stats;
394         }
395 }
396
397 /* BE2 supports only v0 cmd */
398 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
399 {
400         if (BE2_chip(adapter)) {
401                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
402
403                 return &hw_stats->erx;
404         } else if (BE3_chip(adapter)) {
405                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
406
407                 return &hw_stats->erx;
408         } else {
409                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
410
411                 return &hw_stats->erx;
412         }
413 }
414
415 static void populate_be_v0_stats(struct be_adapter *adapter)
416 {
417         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
418         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
419         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
420         struct be_port_rxf_stats_v0 *port_stats =
421                                         &rxf_stats->port[adapter->port_num];
422         struct be_drv_stats *drvs = &adapter->drv_stats;
423
424         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
425         drvs->rx_pause_frames = port_stats->rx_pause_frames;
426         drvs->rx_crc_errors = port_stats->rx_crc_errors;
427         drvs->rx_control_frames = port_stats->rx_control_frames;
428         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
435         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
436         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
437         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
438         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
439         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
440         drvs->rx_dropped_header_too_small =
441                 port_stats->rx_dropped_header_too_small;
442         drvs->rx_address_filtered =
443                                         port_stats->rx_address_filtered +
444                                         port_stats->rx_vlan_filtered;
445         drvs->rx_alignment_symbol_errors =
446                 port_stats->rx_alignment_symbol_errors;
447
448         drvs->tx_pauseframes = port_stats->tx_pauseframes;
449         drvs->tx_controlframes = port_stats->tx_controlframes;
450
451         if (adapter->port_num)
452                 drvs->jabber_events = rxf_stats->port1_jabber_events;
453         else
454                 drvs->jabber_events = rxf_stats->port0_jabber_events;
455         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
456         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
457         drvs->forwarded_packets = rxf_stats->forwarded_packets;
458         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
459         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
460         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
461         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
462 }
463
464 static void populate_be_v1_stats(struct be_adapter *adapter)
465 {
466         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
467         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
468         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
469         struct be_port_rxf_stats_v1 *port_stats =
470                                         &rxf_stats->port[adapter->port_num];
471         struct be_drv_stats *drvs = &adapter->drv_stats;
472
473         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
474         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
475         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
476         drvs->rx_pause_frames = port_stats->rx_pause_frames;
477         drvs->rx_crc_errors = port_stats->rx_crc_errors;
478         drvs->rx_control_frames = port_stats->rx_control_frames;
479         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
480         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
481         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
482         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
483         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
484         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
485         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
486         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
487         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
488         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
489         drvs->rx_dropped_header_too_small =
490                 port_stats->rx_dropped_header_too_small;
491         drvs->rx_input_fifo_overflow_drop =
492                 port_stats->rx_input_fifo_overflow_drop;
493         drvs->rx_address_filtered = port_stats->rx_address_filtered;
494         drvs->rx_alignment_symbol_errors =
495                 port_stats->rx_alignment_symbol_errors;
496         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
497         drvs->tx_pauseframes = port_stats->tx_pauseframes;
498         drvs->tx_controlframes = port_stats->tx_controlframes;
499         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
500         drvs->jabber_events = port_stats->jabber_events;
501         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
502         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
503         drvs->forwarded_packets = rxf_stats->forwarded_packets;
504         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
505         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
506         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
507         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
508 }
509
510 static void populate_be_v2_stats(struct be_adapter *adapter)
511 {
512         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
513         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
514         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
515         struct be_port_rxf_stats_v2 *port_stats =
516                                         &rxf_stats->port[adapter->port_num];
517         struct be_drv_stats *drvs = &adapter->drv_stats;
518
519         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
520         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
521         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
522         drvs->rx_pause_frames = port_stats->rx_pause_frames;
523         drvs->rx_crc_errors = port_stats->rx_crc_errors;
524         drvs->rx_control_frames = port_stats->rx_control_frames;
525         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
526         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
527         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
528         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
529         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
530         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
531         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
532         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
533         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
534         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
535         drvs->rx_dropped_header_too_small =
536                 port_stats->rx_dropped_header_too_small;
537         drvs->rx_input_fifo_overflow_drop =
538                 port_stats->rx_input_fifo_overflow_drop;
539         drvs->rx_address_filtered = port_stats->rx_address_filtered;
540         drvs->rx_alignment_symbol_errors =
541                 port_stats->rx_alignment_symbol_errors;
542         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
543         drvs->tx_pauseframes = port_stats->tx_pauseframes;
544         drvs->tx_controlframes = port_stats->tx_controlframes;
545         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
546         drvs->jabber_events = port_stats->jabber_events;
547         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
548         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
549         drvs->forwarded_packets = rxf_stats->forwarded_packets;
550         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
551         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
552         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
553         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
554         if (be_roce_supported(adapter)) {
555                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
556                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
557                 drvs->rx_roce_frames = port_stats->roce_frames_received;
558                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
559                 drvs->roce_drops_payload_len =
560                         port_stats->roce_drops_payload_len;
561         }
562 }
563
564 static void populate_lancer_stats(struct be_adapter *adapter)
565 {
566         struct be_drv_stats *drvs = &adapter->drv_stats;
567         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
568
569         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
570         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
571         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
572         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
573         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
574         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
575         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
576         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
577         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
578         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
579         drvs->rx_dropped_tcp_length =
580                                 pport_stats->rx_dropped_invalid_tcp_length;
581         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
582         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
583         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
584         drvs->rx_dropped_header_too_small =
585                                 pport_stats->rx_dropped_header_too_small;
586         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
587         drvs->rx_address_filtered =
588                                         pport_stats->rx_address_filtered +
589                                         pport_stats->rx_vlan_filtered;
590         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
591         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
592         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
593         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
594         drvs->jabber_events = pport_stats->rx_jabbers;
595         drvs->forwarded_packets = pport_stats->num_forwards_lo;
596         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
597         drvs->rx_drops_too_many_frags =
598                                 pport_stats->rx_drops_too_many_frags_lo;
599 }
600
601 static void accumulate_16bit_val(u32 *acc, u16 val)
602 {
603 #define lo(x)                   (x & 0xFFFF)
604 #define hi(x)                   (x & 0xFFFF0000)
605         bool wrapped = val < lo(*acc);
606         u32 newacc = hi(*acc) + val;
607
608         if (wrapped)
609                 newacc += 65536;
610         WRITE_ONCE(*acc, newacc);
611 }
612
613 static void populate_erx_stats(struct be_adapter *adapter,
614                                struct be_rx_obj *rxo, u32 erx_stat)
615 {
616         if (!BEx_chip(adapter))
617                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
618         else
619                 /* below erx HW counter can actually wrap around after
620                  * 65535. Driver accumulates a 32-bit value
621                  */
622                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
623                                      (u16)erx_stat);
624 }
625
626 void be_parse_stats(struct be_adapter *adapter)
627 {
628         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
629         struct be_rx_obj *rxo;
630         int i;
631         u32 erx_stat;
632
633         if (lancer_chip(adapter)) {
634                 populate_lancer_stats(adapter);
635         } else {
636                 if (BE2_chip(adapter))
637                         populate_be_v0_stats(adapter);
638                 else if (BE3_chip(adapter))
639                         /* for BE3 */
640                         populate_be_v1_stats(adapter);
641                 else
642                         populate_be_v2_stats(adapter);
643
644                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
645                 for_all_rx_queues(adapter, rxo, i) {
646                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
647                         populate_erx_stats(adapter, rxo, erx_stat);
648                 }
649         }
650 }
651
652 static void be_get_stats64(struct net_device *netdev,
653                            struct rtnl_link_stats64 *stats)
654 {
655         struct be_adapter *adapter = netdev_priv(netdev);
656         struct be_drv_stats *drvs = &adapter->drv_stats;
657         struct be_rx_obj *rxo;
658         struct be_tx_obj *txo;
659         u64 pkts, bytes;
660         unsigned int start;
661         int i;
662
663         for_all_rx_queues(adapter, rxo, i) {
664                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
665
666                 do {
667                         start = u64_stats_fetch_begin(&rx_stats->sync);
668                         pkts = rx_stats(rxo)->rx_pkts;
669                         bytes = rx_stats(rxo)->rx_bytes;
670                 } while (u64_stats_fetch_retry(&rx_stats->sync, start));
671                 stats->rx_packets += pkts;
672                 stats->rx_bytes += bytes;
673                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
674                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
675                                         rx_stats(rxo)->rx_drops_no_frags;
676         }
677
678         for_all_tx_queues(adapter, txo, i) {
679                 const struct be_tx_stats *tx_stats = tx_stats(txo);
680
681                 do {
682                         start = u64_stats_fetch_begin(&tx_stats->sync);
683                         pkts = tx_stats(txo)->tx_pkts;
684                         bytes = tx_stats(txo)->tx_bytes;
685                 } while (u64_stats_fetch_retry(&tx_stats->sync, start));
686                 stats->tx_packets += pkts;
687                 stats->tx_bytes += bytes;
688         }
689
690         /* bad pkts received */
691         stats->rx_errors = drvs->rx_crc_errors +
692                 drvs->rx_alignment_symbol_errors +
693                 drvs->rx_in_range_errors +
694                 drvs->rx_out_range_errors +
695                 drvs->rx_frame_too_long +
696                 drvs->rx_dropped_too_small +
697                 drvs->rx_dropped_too_short +
698                 drvs->rx_dropped_header_too_small +
699                 drvs->rx_dropped_tcp_length +
700                 drvs->rx_dropped_runt;
701
702         /* detailed rx errors */
703         stats->rx_length_errors = drvs->rx_in_range_errors +
704                 drvs->rx_out_range_errors +
705                 drvs->rx_frame_too_long;
706
707         stats->rx_crc_errors = drvs->rx_crc_errors;
708
709         /* frame alignment errors */
710         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
711
712         /* receiver fifo overrun */
713         /* drops_no_pbuf is no per i/f, it's per BE card */
714         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
715                                 drvs->rx_input_fifo_overflow_drop +
716                                 drvs->rx_drops_no_pbuf;
717 }
718
719 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
720 {
721         struct net_device *netdev = adapter->netdev;
722
723         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
724                 netif_carrier_off(netdev);
725                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
726         }
727
728         if (link_status)
729                 netif_carrier_on(netdev);
730         else
731                 netif_carrier_off(netdev);
732
733         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
734 }
735
736 static int be_gso_hdr_len(struct sk_buff *skb)
737 {
738         if (skb->encapsulation)
739                 return skb_inner_tcp_all_headers(skb);
740
741         return skb_tcp_all_headers(skb);
742 }
743
744 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
745 {
746         struct be_tx_stats *stats = tx_stats(txo);
747         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
748         /* Account for headers which get duplicated in TSO pkt */
749         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
750
751         u64_stats_update_begin(&stats->sync);
752         stats->tx_reqs++;
753         stats->tx_bytes += skb->len + dup_hdr_len;
754         stats->tx_pkts += tx_pkts;
755         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
756                 stats->tx_vxlan_offload_pkts += tx_pkts;
757         u64_stats_update_end(&stats->sync);
758 }
759
760 /* Returns number of WRBs needed for the skb */
761 static u32 skb_wrb_cnt(struct sk_buff *skb)
762 {
763         /* +1 for the header wrb */
764         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
765 }
766
767 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
768 {
769         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
770         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
771         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
772         wrb->rsvd0 = 0;
773 }
774
775 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
776  * to avoid the swap and shift/mask operations in wrb_fill().
777  */
778 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
779 {
780         wrb->frag_pa_hi = 0;
781         wrb->frag_pa_lo = 0;
782         wrb->frag_len = 0;
783         wrb->rsvd0 = 0;
784 }
785
786 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
787                                      struct sk_buff *skb)
788 {
789         u8 vlan_prio;
790         u16 vlan_tag;
791
792         vlan_tag = skb_vlan_tag_get(skb);
793         vlan_prio = skb_vlan_tag_get_prio(skb);
794         /* If vlan priority provided by OS is NOT in available bmap */
795         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
796                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
797                                 adapter->recommended_prio_bits;
798
799         return vlan_tag;
800 }
801
802 /* Used only for IP tunnel packets */
803 static u16 skb_inner_ip_proto(struct sk_buff *skb)
804 {
805         return (inner_ip_hdr(skb)->version == 4) ?
806                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
807 }
808
809 static u16 skb_ip_proto(struct sk_buff *skb)
810 {
811         return (ip_hdr(skb)->version == 4) ?
812                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
813 }
814
815 static inline bool be_is_txq_full(struct be_tx_obj *txo)
816 {
817         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
818 }
819
820 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
821 {
822         return atomic_read(&txo->q.used) < txo->q.len / 2;
823 }
824
825 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
826 {
827         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
828 }
829
830 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
831                                        struct sk_buff *skb,
832                                        struct be_wrb_params *wrb_params)
833 {
834         u16 proto;
835
836         if (skb_is_gso(skb)) {
837                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
838                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
839                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
840                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
841         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
842                 if (skb->encapsulation) {
843                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
844                         proto = skb_inner_ip_proto(skb);
845                 } else {
846                         proto = skb_ip_proto(skb);
847                 }
848                 if (proto == IPPROTO_TCP)
849                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
850                 else if (proto == IPPROTO_UDP)
851                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
852         }
853
854         if (skb_vlan_tag_present(skb)) {
855                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
856                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
857         }
858
859         BE_WRB_F_SET(wrb_params->features, CRC, 1);
860 }
861
862 static void wrb_fill_hdr(struct be_adapter *adapter,
863                          struct be_eth_hdr_wrb *hdr,
864                          struct be_wrb_params *wrb_params,
865                          struct sk_buff *skb)
866 {
867         memset(hdr, 0, sizeof(*hdr));
868
869         SET_TX_WRB_HDR_BITS(crc, hdr,
870                             BE_WRB_F_GET(wrb_params->features, CRC));
871         SET_TX_WRB_HDR_BITS(ipcs, hdr,
872                             BE_WRB_F_GET(wrb_params->features, IPCS));
873         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
874                             BE_WRB_F_GET(wrb_params->features, TCPCS));
875         SET_TX_WRB_HDR_BITS(udpcs, hdr,
876                             BE_WRB_F_GET(wrb_params->features, UDPCS));
877
878         SET_TX_WRB_HDR_BITS(lso, hdr,
879                             BE_WRB_F_GET(wrb_params->features, LSO));
880         SET_TX_WRB_HDR_BITS(lso6, hdr,
881                             BE_WRB_F_GET(wrb_params->features, LSO6));
882         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
883
884         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
885          * hack is not needed, the evt bit is set while ringing DB.
886          */
887         SET_TX_WRB_HDR_BITS(event, hdr,
888                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
889         SET_TX_WRB_HDR_BITS(vlan, hdr,
890                             BE_WRB_F_GET(wrb_params->features, VLAN));
891         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
892
893         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
894         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
895         SET_TX_WRB_HDR_BITS(mgmt, hdr,
896                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
897 }
898
899 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
900                           bool unmap_single)
901 {
902         dma_addr_t dma;
903         u32 frag_len = le32_to_cpu(wrb->frag_len);
904
905
906         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
907                 (u64)le32_to_cpu(wrb->frag_pa_lo);
908         if (frag_len) {
909                 if (unmap_single)
910                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
911                 else
912                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
913         }
914 }
915
916 /* Grab a WRB header for xmit */
917 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
918 {
919         u32 head = txo->q.head;
920
921         queue_head_inc(&txo->q);
922         return head;
923 }
924
925 /* Set up the WRB header for xmit */
926 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
927                                 struct be_tx_obj *txo,
928                                 struct be_wrb_params *wrb_params,
929                                 struct sk_buff *skb, u16 head)
930 {
931         u32 num_frags = skb_wrb_cnt(skb);
932         struct be_queue_info *txq = &txo->q;
933         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
934
935         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
936         be_dws_cpu_to_le(hdr, sizeof(*hdr));
937
938         BUG_ON(txo->sent_skb_list[head]);
939         txo->sent_skb_list[head] = skb;
940         txo->last_req_hdr = head;
941         atomic_add(num_frags, &txq->used);
942         txo->last_req_wrb_cnt = num_frags;
943         txo->pend_wrb_cnt += num_frags;
944 }
945
946 /* Setup a WRB fragment (buffer descriptor) for xmit */
947 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
948                                  int len)
949 {
950         struct be_eth_wrb *wrb;
951         struct be_queue_info *txq = &txo->q;
952
953         wrb = queue_head_node(txq);
954         wrb_fill(wrb, busaddr, len);
955         queue_head_inc(txq);
956 }
957
958 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
959  * was invoked. The producer index is restored to the previous packet and the
960  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
961  */
962 static void be_xmit_restore(struct be_adapter *adapter,
963                             struct be_tx_obj *txo, u32 head, bool map_single,
964                             u32 copied)
965 {
966         struct device *dev;
967         struct be_eth_wrb *wrb;
968         struct be_queue_info *txq = &txo->q;
969
970         dev = &adapter->pdev->dev;
971         txq->head = head;
972
973         /* skip the first wrb (hdr); it's not mapped */
974         queue_head_inc(txq);
975         while (copied) {
976                 wrb = queue_head_node(txq);
977                 unmap_tx_frag(dev, wrb, map_single);
978                 map_single = false;
979                 copied -= le32_to_cpu(wrb->frag_len);
980                 queue_head_inc(txq);
981         }
982
983         txq->head = head;
984 }
985
986 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
987  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
988  * of WRBs used up by the packet.
989  */
990 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
991                            struct sk_buff *skb,
992                            struct be_wrb_params *wrb_params)
993 {
994         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
995         struct device *dev = &adapter->pdev->dev;
996         bool map_single = false;
997         u32 head;
998         dma_addr_t busaddr;
999         int len;
1000
1001         head = be_tx_get_wrb_hdr(txo);
1002
1003         if (skb->len > skb->data_len) {
1004                 len = skb_headlen(skb);
1005
1006                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1007                 if (dma_mapping_error(dev, busaddr))
1008                         goto dma_err;
1009                 map_single = true;
1010                 be_tx_setup_wrb_frag(txo, busaddr, len);
1011                 copied += len;
1012         }
1013
1014         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1015                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1016                 len = skb_frag_size(frag);
1017
1018                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1019                 if (dma_mapping_error(dev, busaddr))
1020                         goto dma_err;
1021                 be_tx_setup_wrb_frag(txo, busaddr, len);
1022                 copied += len;
1023         }
1024
1025         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1026
1027         be_tx_stats_update(txo, skb);
1028         return wrb_cnt;
1029
1030 dma_err:
1031         adapter->drv_stats.dma_map_errors++;
1032         be_xmit_restore(adapter, txo, head, map_single, copied);
1033         return 0;
1034 }
1035
1036 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1037 {
1038         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1039 }
1040
1041 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1042                                              struct sk_buff *skb,
1043                                              struct be_wrb_params
1044                                              *wrb_params)
1045 {
1046         bool insert_vlan = false;
1047         u16 vlan_tag = 0;
1048
1049         skb = skb_share_check(skb, GFP_ATOMIC);
1050         if (unlikely(!skb))
1051                 return skb;
1052
1053         if (skb_vlan_tag_present(skb)) {
1054                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1055                 insert_vlan = true;
1056         }
1057
1058         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1059                 if (!insert_vlan) {
1060                         vlan_tag = adapter->pvid;
1061                         insert_vlan = true;
1062                 }
1063                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1064                  * skip VLAN insertion
1065                  */
1066                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1067         }
1068
1069         if (insert_vlan) {
1070                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1071                                                 vlan_tag);
1072                 if (unlikely(!skb))
1073                         return skb;
1074                 __vlan_hwaccel_clear_tag(skb);
1075         }
1076
1077         /* Insert the outer VLAN, if any */
1078         if (adapter->qnq_vid) {
1079                 vlan_tag = adapter->qnq_vid;
1080                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1081                                                 vlan_tag);
1082                 if (unlikely(!skb))
1083                         return skb;
1084                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1085         }
1086
1087         return skb;
1088 }
1089
1090 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1091 {
1092         struct ethhdr *eh = (struct ethhdr *)skb->data;
1093         u16 offset = ETH_HLEN;
1094
1095         if (eh->h_proto == htons(ETH_P_IPV6)) {
1096                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1097
1098                 offset += sizeof(struct ipv6hdr);
1099                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1100                     ip6h->nexthdr != NEXTHDR_UDP) {
1101                         struct ipv6_opt_hdr *ehdr =
1102                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1103
1104                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1105                         if (ehdr->hdrlen == 0xff)
1106                                 return true;
1107                 }
1108         }
1109         return false;
1110 }
1111
1112 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1113 {
1114         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1115 }
1116
1117 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1118 {
1119         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1120 }
1121
1122 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1123                                                   struct sk_buff *skb,
1124                                                   struct be_wrb_params
1125                                                   *wrb_params)
1126 {
1127         struct vlan_ethhdr *veh = skb_vlan_eth_hdr(skb);
1128         unsigned int eth_hdr_len;
1129         struct iphdr *ip;
1130
1131         /* For padded packets, BE HW modifies tot_len field in IP header
1132          * incorrecly when VLAN tag is inserted by HW.
1133          * For padded packets, Lancer computes incorrect checksum.
1134          */
1135         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1136                                                 VLAN_ETH_HLEN : ETH_HLEN;
1137         if (skb->len <= 60 &&
1138             (lancer_chip(adapter) || BE3_chip(adapter) ||
1139              skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
1140                 ip = (struct iphdr *)ip_hdr(skb);
1141                 if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
1142                         goto tx_drop;
1143         }
1144
1145         /* If vlan tag is already inlined in the packet, skip HW VLAN
1146          * tagging in pvid-tagging mode
1147          */
1148         if (be_pvid_tagging_enabled(adapter) &&
1149             veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152         /* HW has a bug wherein it will calculate CSUM for VLAN
1153          * pkts even though it is disabled.
1154          * Manually insert VLAN in pkt.
1155          */
1156         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157             skb_vlan_tag_present(skb)) {
1158                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                 if (unlikely(!skb))
1160                         goto err;
1161         }
1162
1163         /* HW may lockup when VLAN HW tagging is requested on
1164          * certain ipv6 packets. Drop such pkts if the HW workaround to
1165          * skip HW tagging is not enabled by FW.
1166          */
1167         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                      (adapter->pvid || adapter->qnq_vid) &&
1169                      !qnq_async_evt_rcvd(adapter)))
1170                 goto tx_drop;
1171
1172         /* Manual VLAN tag insertion to prevent:
1173          * ASIC lockup when the ASIC inserts VLAN tag into
1174          * certain ipv6 packets. Insert VLAN tags in driver,
1175          * and set event, completion, vlan bits accordingly
1176          * in the Tx WRB.
1177          */
1178         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179             be_vlan_tag_tx_chk(adapter, skb)) {
1180                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                 if (unlikely(!skb))
1182                         goto err;
1183         }
1184
1185         return skb;
1186 tx_drop:
1187         dev_kfree_skb_any(skb);
1188 err:
1189         return NULL;
1190 }
1191
1192 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                            struct sk_buff *skb,
1194                                            struct be_wrb_params *wrb_params)
1195 {
1196         int err;
1197
1198         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199          * packets that are 32b or less may cause a transmit stall
1200          * on that port. The workaround is to pad such packets
1201          * (len <= 32 bytes) to a minimum length of 36b.
1202          */
1203         if (skb->len <= 32) {
1204                 if (skb_put_padto(skb, 36))
1205                         return NULL;
1206         }
1207
1208         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                 if (!skb)
1211                         return NULL;
1212         }
1213
1214         /* The stack can send us skbs with length greater than
1215          * what the HW can handle. Trim the extra bytes.
1216          */
1217         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219         WARN_ON(err);
1220
1221         return skb;
1222 }
1223
1224 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225 {
1226         struct be_queue_info *txq = &txo->q;
1227         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229         /* Mark the last request eventable if it hasn't been marked already */
1230         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233         /* compose a dummy wrb if there are odd set of wrbs to notify */
1234         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                 wrb_fill_dummy(queue_head_node(txq));
1236                 queue_head_inc(txq);
1237                 atomic_inc(&txq->used);
1238                 txo->pend_wrb_cnt++;
1239                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                            TX_HDR_WRB_NUM_SHIFT);
1241                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                           TX_HDR_WRB_NUM_SHIFT);
1243         }
1244         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245         txo->pend_wrb_cnt = 0;
1246 }
1247
1248 /* OS2BMC related */
1249
1250 #define DHCP_CLIENT_PORT        68
1251 #define DHCP_SERVER_PORT        67
1252 #define NET_BIOS_PORT1          137
1253 #define NET_BIOS_PORT2          138
1254 #define DHCPV6_RAS_PORT         547
1255
1256 #define is_mc_allowed_on_bmc(adapter, eh)       \
1257         (!is_multicast_filt_enabled(adapter) && \
1258          is_multicast_ether_addr(eh->h_dest) && \
1259          !is_broadcast_ether_addr(eh->h_dest))
1260
1261 #define is_bc_allowed_on_bmc(adapter, eh)       \
1262         (!is_broadcast_filt_enabled(adapter) && \
1263          is_broadcast_ether_addr(eh->h_dest))
1264
1265 #define is_arp_allowed_on_bmc(adapter, skb)     \
1266         (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1269
1270 #define is_arp_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273 #define is_dhcp_client_filt_enabled(adapter)    \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276 #define is_dhcp_srvr_filt_enabled(adapter)      \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279 #define is_nbios_filt_enabled(adapter)  \
1280                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282 #define is_ipv6_na_filt_enabled(adapter)        \
1283                 (adapter->bmc_filt_mask &       \
1284                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286 #define is_ipv6_ra_filt_enabled(adapter)        \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289 #define is_ipv6_ras_filt_enabled(adapter)       \
1290                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292 #define is_broadcast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295 #define is_multicast_filt_enabled(adapter)      \
1296                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299                                struct sk_buff **skb)
1300 {
1301         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302         bool os2bmc = false;
1303
1304         if (!be_is_os2bmc_enabled(adapter))
1305                 goto done;
1306
1307         if (!is_multicast_ether_addr(eh->h_dest))
1308                 goto done;
1309
1310         if (is_mc_allowed_on_bmc(adapter, eh) ||
1311             is_bc_allowed_on_bmc(adapter, eh) ||
1312             is_arp_allowed_on_bmc(adapter, (*skb))) {
1313                 os2bmc = true;
1314                 goto done;
1315         }
1316
1317         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319                 u8 nexthdr = hdr->nexthdr;
1320
1321                 if (nexthdr == IPPROTO_ICMPV6) {
1322                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324                         switch (icmp6->icmp6_type) {
1325                         case NDISC_ROUTER_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327                                 goto done;
1328                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1330                                 goto done;
1331                         default:
1332                                 break;
1333                         }
1334                 }
1335         }
1336
1337         if (is_udp_pkt((*skb))) {
1338                 struct udphdr *udp = udp_hdr((*skb));
1339
1340                 switch (ntohs(udp->dest)) {
1341                 case DHCP_CLIENT_PORT:
1342                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1343                         goto done;
1344                 case DHCP_SERVER_PORT:
1345                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346                         goto done;
1347                 case NET_BIOS_PORT1:
1348                 case NET_BIOS_PORT2:
1349                         os2bmc = is_nbios_filt_enabled(adapter);
1350                         goto done;
1351                 case DHCPV6_RAS_PORT:
1352                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353                         goto done;
1354                 default:
1355                         break;
1356                 }
1357         }
1358 done:
1359         /* For packets over a vlan, which are destined
1360          * to BMC, asic expects the vlan to be inline in the packet.
1361          */
1362         if (os2bmc)
1363                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365         return os2bmc;
1366 }
1367
1368 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369 {
1370         struct be_adapter *adapter = netdev_priv(netdev);
1371         u16 q_idx = skb_get_queue_mapping(skb);
1372         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373         struct be_wrb_params wrb_params = { 0 };
1374         bool flush = !netdev_xmit_more();
1375         u16 wrb_cnt;
1376
1377         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378         if (unlikely(!skb))
1379                 goto drop;
1380
1381         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384         if (unlikely(!wrb_cnt))
1385                 goto drop_skb;
1386
1387         /* if os2bmc is enabled and if the pkt is destined to bmc,
1388          * enqueue the pkt a 2nd time with mgmt bit set.
1389          */
1390         if (be_send_pkt_to_bmc(adapter, &skb)) {
1391                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1392                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1393                 if (unlikely(!wrb_cnt))
1394                         goto drop_skb;
1395                 else
1396                         skb_get(skb);
1397         }
1398
1399         if (be_is_txq_full(txo)) {
1400                 netif_stop_subqueue(netdev, q_idx);
1401                 tx_stats(txo)->tx_stops++;
1402         }
1403
1404         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1405                 be_xmit_flush(adapter, txo);
1406
1407         return NETDEV_TX_OK;
1408 drop_skb:
1409         dev_kfree_skb_any(skb);
1410 drop:
1411         tx_stats(txo)->tx_drv_drops++;
1412         /* Flush the already enqueued tx requests */
1413         if (flush && txo->pend_wrb_cnt)
1414                 be_xmit_flush(adapter, txo);
1415
1416         return NETDEV_TX_OK;
1417 }
1418
1419 static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420 {
1421         struct be_adapter *adapter = netdev_priv(netdev);
1422         struct device *dev = &adapter->pdev->dev;
1423         struct be_tx_obj *txo;
1424         struct sk_buff *skb;
1425         struct tcphdr *tcphdr;
1426         struct udphdr *udphdr;
1427         u32 *entry;
1428         int status;
1429         int i, j;
1430
1431         for_all_tx_queues(adapter, txo, i) {
1432                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433                          i, txo->q.head, txo->q.tail,
1434                          atomic_read(&txo->q.used), txo->q.id);
1435
1436                 entry = txo->q.dma_mem.va;
1437                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1439                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1440                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441                                          j, entry[j], entry[j + 1],
1442                                          entry[j + 2], entry[j + 3]);
1443                         }
1444                 }
1445
1446                 entry = txo->cq.dma_mem.va;
1447                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448                          i, txo->cq.head, txo->cq.tail,
1449                          atomic_read(&txo->cq.used));
1450                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1452                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1453                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454                                          j, entry[j], entry[j + 1],
1455                                          entry[j + 2], entry[j + 3]);
1456                         }
1457                 }
1458
1459                 for (j = 0; j < TX_Q_LEN; j++) {
1460                         if (txo->sent_skb_list[j]) {
1461                                 skb = txo->sent_skb_list[j];
1462                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463                                         tcphdr = tcp_hdr(skb);
1464                                         dev_info(dev, "TCP source port %d\n",
1465                                                  ntohs(tcphdr->source));
1466                                         dev_info(dev, "TCP dest port %d\n",
1467                                                  ntohs(tcphdr->dest));
1468                                         dev_info(dev, "TCP sequence num %u\n",
1469                                                  ntohl(tcphdr->seq));
1470                                         dev_info(dev, "TCP ack_seq %u\n",
1471                                                  ntohl(tcphdr->ack_seq));
1472                                 } else if (ip_hdr(skb)->protocol ==
1473                                            IPPROTO_UDP) {
1474                                         udphdr = udp_hdr(skb);
1475                                         dev_info(dev, "UDP source port %d\n",
1476                                                  ntohs(udphdr->source));
1477                                         dev_info(dev, "UDP dest port %d\n",
1478                                                  ntohs(udphdr->dest));
1479                                 }
1480                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481                                          j, skb, skb->len, skb->protocol);
1482                         }
1483                 }
1484         }
1485
1486         if (lancer_chip(adapter)) {
1487                 dev_info(dev, "Initiating reset due to tx timeout\n");
1488                 dev_info(dev, "Resetting adapter\n");
1489                 status = lancer_physdev_ctrl(adapter,
1490                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1491                 if (status)
1492                         dev_err(dev, "Reset failed .. Reboot server\n");
1493         }
1494 }
1495
1496 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497 {
1498         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1500 }
1501
1502 static int be_set_vlan_promisc(struct be_adapter *adapter)
1503 {
1504         struct device *dev = &adapter->pdev->dev;
1505         int status;
1506
1507         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508                 return 0;
1509
1510         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511         if (!status) {
1512                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514         } else {
1515                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516         }
1517         return status;
1518 }
1519
1520 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521 {
1522         struct device *dev = &adapter->pdev->dev;
1523         int status;
1524
1525         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526         if (!status) {
1527                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529         }
1530         return status;
1531 }
1532
1533 /*
1534  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535  * If the user configures more, place BE in vlan promiscuous mode.
1536  */
1537 static int be_vid_config(struct be_adapter *adapter)
1538 {
1539         struct device *dev = &adapter->pdev->dev;
1540         u16 vids[BE_NUM_VLANS_SUPPORTED];
1541         u16 num = 0, i = 0;
1542         int status = 0;
1543
1544         /* No need to change the VLAN state if the I/F is in promiscuous */
1545         if (adapter->netdev->flags & IFF_PROMISC)
1546                 return 0;
1547
1548         if (adapter->vlans_added > be_max_vlans(adapter))
1549                 return be_set_vlan_promisc(adapter);
1550
1551         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552                 status = be_clear_vlan_promisc(adapter);
1553                 if (status)
1554                         return status;
1555         }
1556         /* Construct VLAN Table to give to HW */
1557         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558                 vids[num++] = cpu_to_le16(i);
1559
1560         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561         if (status) {
1562                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1563                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1564                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565                     addl_status(status) ==
1566                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567                         return be_set_vlan_promisc(adapter);
1568         }
1569         return status;
1570 }
1571
1572 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573 {
1574         struct be_adapter *adapter = netdev_priv(netdev);
1575         int status = 0;
1576
1577         mutex_lock(&adapter->rx_filter_lock);
1578
1579         /* Packets with VID 0 are always received by Lancer by default */
1580         if (lancer_chip(adapter) && vid == 0)
1581                 goto done;
1582
1583         if (test_bit(vid, adapter->vids))
1584                 goto done;
1585
1586         set_bit(vid, adapter->vids);
1587         adapter->vlans_added++;
1588
1589         status = be_vid_config(adapter);
1590 done:
1591         mutex_unlock(&adapter->rx_filter_lock);
1592         return status;
1593 }
1594
1595 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598         int status = 0;
1599
1600         mutex_lock(&adapter->rx_filter_lock);
1601
1602         /* Packets with VID 0 are always received by Lancer by default */
1603         if (lancer_chip(adapter) && vid == 0)
1604                 goto done;
1605
1606         if (!test_bit(vid, adapter->vids))
1607                 goto done;
1608
1609         clear_bit(vid, adapter->vids);
1610         adapter->vlans_added--;
1611
1612         status = be_vid_config(adapter);
1613 done:
1614         mutex_unlock(&adapter->rx_filter_lock);
1615         return status;
1616 }
1617
1618 static void be_set_all_promisc(struct be_adapter *adapter)
1619 {
1620         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622 }
1623
1624 static void be_set_mc_promisc(struct be_adapter *adapter)
1625 {
1626         int status;
1627
1628         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629                 return;
1630
1631         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632         if (!status)
1633                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634 }
1635
1636 static void be_set_uc_promisc(struct be_adapter *adapter)
1637 {
1638         int status;
1639
1640         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641                 return;
1642
1643         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644         if (!status)
1645                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646 }
1647
1648 static void be_clear_uc_promisc(struct be_adapter *adapter)
1649 {
1650         int status;
1651
1652         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653                 return;
1654
1655         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656         if (!status)
1657                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658 }
1659
1660 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661  * We use a single callback function for both sync and unsync. We really don't
1662  * add/remove addresses through this callback. But, we use it to detect changes
1663  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664  */
1665 static int be_uc_list_update(struct net_device *netdev,
1666                              const unsigned char *addr)
1667 {
1668         struct be_adapter *adapter = netdev_priv(netdev);
1669
1670         adapter->update_uc_list = true;
1671         return 0;
1672 }
1673
1674 static int be_mc_list_update(struct net_device *netdev,
1675                              const unsigned char *addr)
1676 {
1677         struct be_adapter *adapter = netdev_priv(netdev);
1678
1679         adapter->update_mc_list = true;
1680         return 0;
1681 }
1682
1683 static void be_set_mc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool mc_promisc = false;
1688         int status;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_mc_list = false;
1695         } else if (netdev->flags & IFF_ALLMULTI ||
1696                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697                 /* Enable multicast promisc if num configured exceeds
1698                  * what we support
1699                  */
1700                 mc_promisc = true;
1701                 adapter->update_mc_list = false;
1702         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703                 /* Update mc-list unconditionally if the iface was previously
1704                  * in mc-promisc mode and now is out of that mode.
1705                  */
1706                 adapter->update_mc_list = true;
1707         }
1708
1709         if (adapter->update_mc_list) {
1710                 int i = 0;
1711
1712                 /* cache the mc-list in adapter */
1713                 netdev_for_each_mc_addr(ha, netdev) {
1714                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715                         i++;
1716                 }
1717                 adapter->mc_count = netdev_mc_count(netdev);
1718         }
1719         netif_addr_unlock_bh(netdev);
1720
1721         if (mc_promisc) {
1722                 be_set_mc_promisc(adapter);
1723         } else if (adapter->update_mc_list) {
1724                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725                 if (!status)
1726                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727                 else
1728                         be_set_mc_promisc(adapter);
1729
1730                 adapter->update_mc_list = false;
1731         }
1732 }
1733
1734 static void be_clear_mc_list(struct be_adapter *adapter)
1735 {
1736         struct net_device *netdev = adapter->netdev;
1737
1738         __dev_mc_unsync(netdev, NULL);
1739         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740         adapter->mc_count = 0;
1741 }
1742
1743 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744 {
1745         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747                 return 0;
1748         }
1749
1750         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751                                adapter->if_handle,
1752                                &adapter->pmac_id[uc_idx + 1], 0);
1753 }
1754
1755 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756 {
1757         if (pmac_id == adapter->pmac_id[0])
1758                 return;
1759
1760         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761 }
1762
1763 static void be_set_uc_list(struct be_adapter *adapter)
1764 {
1765         struct net_device *netdev = adapter->netdev;
1766         struct netdev_hw_addr *ha;
1767         bool uc_promisc = false;
1768         int curr_uc_macs = 0, i;
1769
1770         netif_addr_lock_bh(netdev);
1771         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773         if (netdev->flags & IFF_PROMISC) {
1774                 adapter->update_uc_list = false;
1775         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776                 uc_promisc = true;
1777                 adapter->update_uc_list = false;
1778         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779                 /* Update uc-list unconditionally if the iface was previously
1780                  * in uc-promisc mode and now is out of that mode.
1781                  */
1782                 adapter->update_uc_list = true;
1783         }
1784
1785         if (adapter->update_uc_list) {
1786                 /* cache the uc-list in adapter array */
1787                 i = 0;
1788                 netdev_for_each_uc_addr(ha, netdev) {
1789                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790                         i++;
1791                 }
1792                 curr_uc_macs = netdev_uc_count(netdev);
1793         }
1794         netif_addr_unlock_bh(netdev);
1795
1796         if (uc_promisc) {
1797                 be_set_uc_promisc(adapter);
1798         } else if (adapter->update_uc_list) {
1799                 be_clear_uc_promisc(adapter);
1800
1801                 for (i = 0; i < adapter->uc_macs; i++)
1802                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804                 for (i = 0; i < curr_uc_macs; i++)
1805                         be_uc_mac_add(adapter, i);
1806                 adapter->uc_macs = curr_uc_macs;
1807                 adapter->update_uc_list = false;
1808         }
1809 }
1810
1811 static void be_clear_uc_list(struct be_adapter *adapter)
1812 {
1813         struct net_device *netdev = adapter->netdev;
1814         int i;
1815
1816         __dev_uc_unsync(netdev, NULL);
1817         for (i = 0; i < adapter->uc_macs; i++)
1818                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820         adapter->uc_macs = 0;
1821 }
1822
1823 static void __be_set_rx_mode(struct be_adapter *adapter)
1824 {
1825         struct net_device *netdev = adapter->netdev;
1826
1827         mutex_lock(&adapter->rx_filter_lock);
1828
1829         if (netdev->flags & IFF_PROMISC) {
1830                 if (!be_in_all_promisc(adapter))
1831                         be_set_all_promisc(adapter);
1832         } else if (be_in_all_promisc(adapter)) {
1833                 /* We need to re-program the vlan-list or clear
1834                  * vlan-promisc mode (if needed) when the interface
1835                  * comes out of promisc mode.
1836                  */
1837                 be_vid_config(adapter);
1838         }
1839
1840         be_set_uc_list(adapter);
1841         be_set_mc_list(adapter);
1842
1843         mutex_unlock(&adapter->rx_filter_lock);
1844 }
1845
1846 static void be_work_set_rx_mode(struct work_struct *work)
1847 {
1848         struct be_cmd_work *cmd_work =
1849                                 container_of(work, struct be_cmd_work, work);
1850
1851         __be_set_rx_mode(cmd_work->adapter);
1852         kfree(cmd_work);
1853 }
1854
1855 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856 {
1857         struct be_adapter *adapter = netdev_priv(netdev);
1858         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859         int status;
1860
1861         if (!sriov_enabled(adapter))
1862                 return -EPERM;
1863
1864         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865                 return -EINVAL;
1866
1867         /* Proceed further only if user provided MAC is different
1868          * from active MAC
1869          */
1870         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871                 return 0;
1872
1873         if (BEx_chip(adapter)) {
1874                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875                                 vf + 1);
1876
1877                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878                                          &vf_cfg->pmac_id, vf + 1);
1879         } else {
1880                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881                                         vf + 1);
1882         }
1883
1884         if (status) {
1885                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886                         mac, vf, status);
1887                 return be_cmd_status(status);
1888         }
1889
1890         ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892         return 0;
1893 }
1894
1895 static int be_get_vf_config(struct net_device *netdev, int vf,
1896                             struct ifla_vf_info *vi)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901         if (!sriov_enabled(adapter))
1902                 return -EPERM;
1903
1904         if (vf >= adapter->num_vfs)
1905                 return -EINVAL;
1906
1907         vi->vf = vf;
1908         vi->max_tx_rate = vf_cfg->tx_rate;
1909         vi->min_tx_rate = 0;
1910         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916         return 0;
1917 }
1918
1919 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920 {
1921         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922         u16 vids[BE_NUM_VLANS_SUPPORTED];
1923         int vf_if_id = vf_cfg->if_handle;
1924         int status;
1925
1926         /* Enable Transparent VLAN Tagging */
1927         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928         if (status)
1929                 return status;
1930
1931         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932         vids[0] = 0;
1933         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934         if (!status)
1935                 dev_info(&adapter->pdev->dev,
1936                          "Cleared guest VLANs on VF%d", vf);
1937
1938         /* After TVT is enabled, disallow VFs to program VLAN filters */
1939         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1942                 if (!status)
1943                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944         }
1945         return 0;
1946 }
1947
1948 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949 {
1950         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951         struct device *dev = &adapter->pdev->dev;
1952         int status;
1953
1954         /* Reset Transparent VLAN Tagging. */
1955         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956                                        vf_cfg->if_handle, 0, 0);
1957         if (status)
1958                 return status;
1959
1960         /* Allow VFs to program VLAN filtering */
1961         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963                                                   BE_PRIV_FILTMGMT, vf + 1);
1964                 if (!status) {
1965                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967                 }
1968         }
1969
1970         dev_info(dev,
1971                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972         return 0;
1973 }
1974
1975 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976                           __be16 vlan_proto)
1977 {
1978         struct be_adapter *adapter = netdev_priv(netdev);
1979         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980         int status;
1981
1982         if (!sriov_enabled(adapter))
1983                 return -EPERM;
1984
1985         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986                 return -EINVAL;
1987
1988         if (vlan_proto != htons(ETH_P_8021Q))
1989                 return -EPROTONOSUPPORT;
1990
1991         if (vlan || qos) {
1992                 vlan |= qos << VLAN_PRIO_SHIFT;
1993                 status = be_set_vf_tvt(adapter, vf, vlan);
1994         } else {
1995                 status = be_clear_vf_tvt(adapter, vf);
1996         }
1997
1998         if (status) {
1999                 dev_err(&adapter->pdev->dev,
2000                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001                         status);
2002                 return be_cmd_status(status);
2003         }
2004
2005         vf_cfg->vlan_tag = vlan;
2006         return 0;
2007 }
2008
2009 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010                              int min_tx_rate, int max_tx_rate)
2011 {
2012         struct be_adapter *adapter = netdev_priv(netdev);
2013         struct device *dev = &adapter->pdev->dev;
2014         int percent_rate, status = 0;
2015         u16 link_speed = 0;
2016         u8 link_status;
2017
2018         if (!sriov_enabled(adapter))
2019                 return -EPERM;
2020
2021         if (vf >= adapter->num_vfs)
2022                 return -EINVAL;
2023
2024         if (min_tx_rate)
2025                 return -EINVAL;
2026
2027         if (!max_tx_rate)
2028                 goto config_qos;
2029
2030         status = be_cmd_link_status_query(adapter, &link_speed,
2031                                           &link_status, 0);
2032         if (status)
2033                 goto err;
2034
2035         if (!link_status) {
2036                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037                 status = -ENETDOWN;
2038                 goto err;
2039         }
2040
2041         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043                         link_speed);
2044                 status = -EINVAL;
2045                 goto err;
2046         }
2047
2048         /* On Skyhawk the QOS setting must be done only as a % value */
2049         percent_rate = link_speed / 100;
2050         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052                         percent_rate);
2053                 status = -EINVAL;
2054                 goto err;
2055         }
2056
2057 config_qos:
2058         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059         if (status)
2060                 goto err;
2061
2062         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063         return 0;
2064
2065 err:
2066         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067                 max_tx_rate, vf);
2068         return be_cmd_status(status);
2069 }
2070
2071 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072                                 int link_state)
2073 {
2074         struct be_adapter *adapter = netdev_priv(netdev);
2075         int status;
2076
2077         if (!sriov_enabled(adapter))
2078                 return -EPERM;
2079
2080         if (vf >= adapter->num_vfs)
2081                 return -EINVAL;
2082
2083         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084         if (status) {
2085                 dev_err(&adapter->pdev->dev,
2086                         "Link state change on VF %d failed: %#x\n", vf, status);
2087                 return be_cmd_status(status);
2088         }
2089
2090         adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092         return 0;
2093 }
2094
2095 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096 {
2097         struct be_adapter *adapter = netdev_priv(netdev);
2098         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099         u8 spoofchk;
2100         int status;
2101
2102         if (!sriov_enabled(adapter))
2103                 return -EPERM;
2104
2105         if (vf >= adapter->num_vfs)
2106                 return -EINVAL;
2107
2108         if (BEx_chip(adapter))
2109                 return -EOPNOTSUPP;
2110
2111         if (enable == vf_cfg->spoofchk)
2112                 return 0;
2113
2114         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117                                        0, spoofchk);
2118         if (status) {
2119                 dev_err(&adapter->pdev->dev,
2120                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2121                 return be_cmd_status(status);
2122         }
2123
2124         vf_cfg->spoofchk = enable;
2125         return 0;
2126 }
2127
2128 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129                           ulong now)
2130 {
2131         aic->rx_pkts_prev = rx_pkts;
2132         aic->tx_reqs_prev = tx_pkts;
2133         aic->jiffies = now;
2134 }
2135
2136 static int be_get_new_eqd(struct be_eq_obj *eqo)
2137 {
2138         struct be_adapter *adapter = eqo->adapter;
2139         int eqd, start;
2140         struct be_aic_obj *aic;
2141         struct be_rx_obj *rxo;
2142         struct be_tx_obj *txo;
2143         u64 rx_pkts = 0, tx_pkts = 0;
2144         ulong now;
2145         u32 pps, delta;
2146         int i;
2147
2148         aic = &adapter->aic_obj[eqo->idx];
2149         if (!adapter->aic_enabled) {
2150                 if (aic->jiffies)
2151                         aic->jiffies = 0;
2152                 eqd = aic->et_eqd;
2153                 return eqd;
2154         }
2155
2156         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157                 do {
2158                         start = u64_stats_fetch_begin(&rxo->stats.sync);
2159                         rx_pkts += rxo->stats.rx_pkts;
2160                 } while (u64_stats_fetch_retry(&rxo->stats.sync, start));
2161         }
2162
2163         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164                 do {
2165                         start = u64_stats_fetch_begin(&txo->stats.sync);
2166                         tx_pkts += txo->stats.tx_reqs;
2167                 } while (u64_stats_fetch_retry(&txo->stats.sync, start));
2168         }
2169
2170         /* Skip, if wrapped around or first calculation */
2171         now = jiffies;
2172         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173             rx_pkts < aic->rx_pkts_prev ||
2174             tx_pkts < aic->tx_reqs_prev) {
2175                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2176                 return aic->prev_eqd;
2177         }
2178
2179         delta = jiffies_to_msecs(now - aic->jiffies);
2180         if (delta == 0)
2181                 return aic->prev_eqd;
2182
2183         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185         eqd = (pps / 15000) << 2;
2186
2187         if (eqd < 8)
2188                 eqd = 0;
2189         eqd = min_t(u32, eqd, aic->max_eqd);
2190         eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192         be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194         return eqd;
2195 }
2196
2197 /* For Skyhawk-R only */
2198 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199 {
2200         struct be_adapter *adapter = eqo->adapter;
2201         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202         ulong now = jiffies;
2203         int eqd;
2204         u32 mult_enc;
2205
2206         if (!adapter->aic_enabled)
2207                 return 0;
2208
2209         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210                 eqd = aic->prev_eqd;
2211         else
2212                 eqd = be_get_new_eqd(eqo);
2213
2214         if (eqd > 100)
2215                 mult_enc = R2I_DLY_ENC_1;
2216         else if (eqd > 60)
2217                 mult_enc = R2I_DLY_ENC_2;
2218         else if (eqd > 20)
2219                 mult_enc = R2I_DLY_ENC_3;
2220         else
2221                 mult_enc = R2I_DLY_ENC_0;
2222
2223         aic->prev_eqd = eqd;
2224
2225         return mult_enc;
2226 }
2227
2228 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229 {
2230         struct be_set_eqd set_eqd[MAX_EVT_QS];
2231         struct be_aic_obj *aic;
2232         struct be_eq_obj *eqo;
2233         int i, num = 0, eqd;
2234
2235         for_all_evt_queues(adapter, eqo, i) {
2236                 aic = &adapter->aic_obj[eqo->idx];
2237                 eqd = be_get_new_eqd(eqo);
2238                 if (force_update || eqd != aic->prev_eqd) {
2239                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240                         set_eqd[num].eq_id = eqo->q.id;
2241                         aic->prev_eqd = eqd;
2242                         num++;
2243                 }
2244         }
2245
2246         if (num)
2247                 be_cmd_modify_eqd(adapter, set_eqd, num);
2248 }
2249
2250 static void be_rx_stats_update(struct be_rx_obj *rxo,
2251                                struct be_rx_compl_info *rxcp)
2252 {
2253         struct be_rx_stats *stats = rx_stats(rxo);
2254
2255         u64_stats_update_begin(&stats->sync);
2256         stats->rx_compl++;
2257         stats->rx_bytes += rxcp->pkt_size;
2258         stats->rx_pkts++;
2259         if (rxcp->tunneled)
2260                 stats->rx_vxlan_offload_pkts++;
2261         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262                 stats->rx_mcast_pkts++;
2263         if (rxcp->err)
2264                 stats->rx_compl_err++;
2265         u64_stats_update_end(&stats->sync);
2266 }
2267
2268 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269 {
2270         /* L4 checksum is not reliable for non TCP/UDP packets.
2271          * Also ignore ipcksm for ipv6 pkts
2272          */
2273         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275 }
2276
2277 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278 {
2279         struct be_adapter *adapter = rxo->adapter;
2280         struct be_rx_page_info *rx_page_info;
2281         struct be_queue_info *rxq = &rxo->q;
2282         u32 frag_idx = rxq->tail;
2283
2284         rx_page_info = &rxo->page_info_tbl[frag_idx];
2285         BUG_ON(!rx_page_info->page);
2286
2287         if (rx_page_info->last_frag) {
2288                 dma_unmap_page(&adapter->pdev->dev,
2289                                dma_unmap_addr(rx_page_info, bus),
2290                                adapter->big_page_size, DMA_FROM_DEVICE);
2291                 rx_page_info->last_frag = false;
2292         } else {
2293                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2294                                         dma_unmap_addr(rx_page_info, bus),
2295                                         rx_frag_size, DMA_FROM_DEVICE);
2296         }
2297
2298         queue_tail_inc(rxq);
2299         atomic_dec(&rxq->used);
2300         return rx_page_info;
2301 }
2302
2303 /* Throwaway the data in the Rx completion */
2304 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305                                 struct be_rx_compl_info *rxcp)
2306 {
2307         struct be_rx_page_info *page_info;
2308         u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310         for (i = 0; i < num_rcvd; i++) {
2311                 page_info = get_rx_page_info(rxo);
2312                 put_page(page_info->page);
2313                 memset(page_info, 0, sizeof(*page_info));
2314         }
2315 }
2316
2317 /*
2318  * skb_fill_rx_data forms a complete skb for an ether frame
2319  * indicated by rxcp.
2320  */
2321 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322                              struct be_rx_compl_info *rxcp)
2323 {
2324         struct be_rx_page_info *page_info;
2325         u16 i, j;
2326         u16 hdr_len, curr_frag_len, remaining;
2327         u8 *start;
2328
2329         page_info = get_rx_page_info(rxo);
2330         start = page_address(page_info->page) + page_info->page_offset;
2331         prefetch(start);
2332
2333         /* Copy data in the first descriptor of this completion */
2334         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336         skb->len = curr_frag_len;
2337         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338                 memcpy(skb->data, start, curr_frag_len);
2339                 /* Complete packet has now been moved to data */
2340                 put_page(page_info->page);
2341                 skb->data_len = 0;
2342                 skb->tail += curr_frag_len;
2343         } else {
2344                 hdr_len = ETH_HLEN;
2345                 memcpy(skb->data, start, hdr_len);
2346                 skb_shinfo(skb)->nr_frags = 1;
2347                 skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0],
2348                                         page_info->page,
2349                                         page_info->page_offset + hdr_len,
2350                                         curr_frag_len - hdr_len);
2351                 skb->data_len = curr_frag_len - hdr_len;
2352                 skb->truesize += rx_frag_size;
2353                 skb->tail += hdr_len;
2354         }
2355         page_info->page = NULL;
2356
2357         if (rxcp->pkt_size <= rx_frag_size) {
2358                 BUG_ON(rxcp->num_rcvd != 1);
2359                 return;
2360         }
2361
2362         /* More frags present for this completion */
2363         remaining = rxcp->pkt_size - curr_frag_len;
2364         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2365                 page_info = get_rx_page_info(rxo);
2366                 curr_frag_len = min(remaining, rx_frag_size);
2367
2368                 /* Coalesce all frags from the same physical page in one slot */
2369                 if (page_info->page_offset == 0) {
2370                         /* Fresh page */
2371                         j++;
2372                         skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
2373                                                 page_info->page,
2374                                                 page_info->page_offset,
2375                                                 curr_frag_len);
2376                         skb_shinfo(skb)->nr_frags++;
2377                 } else {
2378                         put_page(page_info->page);
2379                         skb_frag_size_add(&skb_shinfo(skb)->frags[j],
2380                                           curr_frag_len);
2381                 }
2382
2383                 skb->len += curr_frag_len;
2384                 skb->data_len += curr_frag_len;
2385                 skb->truesize += rx_frag_size;
2386                 remaining -= curr_frag_len;
2387                 page_info->page = NULL;
2388         }
2389         BUG_ON(j > MAX_SKB_FRAGS);
2390 }
2391
2392 /* Process the RX completion indicated by rxcp when GRO is disabled */
2393 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394                                 struct be_rx_compl_info *rxcp)
2395 {
2396         struct be_adapter *adapter = rxo->adapter;
2397         struct net_device *netdev = adapter->netdev;
2398         struct sk_buff *skb;
2399
2400         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401         if (unlikely(!skb)) {
2402                 rx_stats(rxo)->rx_drops_no_skbs++;
2403                 be_rx_compl_discard(rxo, rxcp);
2404                 return;
2405         }
2406
2407         skb_fill_rx_data(rxo, skb, rxcp);
2408
2409         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2411         else
2412                 skb_checksum_none_assert(skb);
2413
2414         skb->protocol = eth_type_trans(skb, netdev);
2415         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416         if (netdev->features & NETIF_F_RXHASH)
2417                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419         skb->csum_level = rxcp->tunneled;
2420         skb_mark_napi_id(skb, napi);
2421
2422         if (rxcp->vlanf)
2423                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425         netif_receive_skb(skb);
2426 }
2427
2428 /* Process the RX completion indicated by rxcp when GRO is enabled */
2429 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430                                     struct napi_struct *napi,
2431                                     struct be_rx_compl_info *rxcp)
2432 {
2433         struct be_adapter *adapter = rxo->adapter;
2434         struct be_rx_page_info *page_info;
2435         struct sk_buff *skb = NULL;
2436         u16 remaining, curr_frag_len;
2437         u16 i, j;
2438
2439         skb = napi_get_frags(napi);
2440         if (!skb) {
2441                 be_rx_compl_discard(rxo, rxcp);
2442                 return;
2443         }
2444
2445         remaining = rxcp->pkt_size;
2446         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447                 page_info = get_rx_page_info(rxo);
2448
2449                 curr_frag_len = min(remaining, rx_frag_size);
2450
2451                 /* Coalesce all frags from the same physical page in one slot */
2452                 if (i == 0 || page_info->page_offset == 0) {
2453                         /* First frag or Fresh page */
2454                         j++;
2455                         skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
2456                                                 page_info->page,
2457                                                 page_info->page_offset,
2458                                                 curr_frag_len);
2459                 } else {
2460                         put_page(page_info->page);
2461                         skb_frag_size_add(&skb_shinfo(skb)->frags[j],
2462                                           curr_frag_len);
2463                 }
2464
2465                 skb->truesize += rx_frag_size;
2466                 remaining -= curr_frag_len;
2467                 memset(page_info, 0, sizeof(*page_info));
2468         }
2469         BUG_ON(j > MAX_SKB_FRAGS);
2470
2471         skb_shinfo(skb)->nr_frags = j + 1;
2472         skb->len = rxcp->pkt_size;
2473         skb->data_len = rxcp->pkt_size;
2474         skb->ip_summed = CHECKSUM_UNNECESSARY;
2475         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2476         if (adapter->netdev->features & NETIF_F_RXHASH)
2477                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2478
2479         skb->csum_level = rxcp->tunneled;
2480
2481         if (rxcp->vlanf)
2482                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2483
2484         napi_gro_frags(napi);
2485 }
2486
2487 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2488                                  struct be_rx_compl_info *rxcp)
2489 {
2490         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2491         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2492         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2493         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2494         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2495         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2496         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2497         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2498         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2499         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2500         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2501         if (rxcp->vlanf) {
2502                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2503                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2504         }
2505         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2506         rxcp->tunneled =
2507                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2508 }
2509
2510 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2511                                  struct be_rx_compl_info *rxcp)
2512 {
2513         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2514         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2515         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2516         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2517         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2518         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2519         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2520         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2521         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2522         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2523         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2524         if (rxcp->vlanf) {
2525                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2526                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2527         }
2528         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2529         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2530 }
2531
2532 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2533 {
2534         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2535         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2536         struct be_adapter *adapter = rxo->adapter;
2537
2538         /* For checking the valid bit it is Ok to use either definition as the
2539          * valid bit is at the same position in both v0 and v1 Rx compl */
2540         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2541                 return NULL;
2542
2543         rmb();
2544         be_dws_le_to_cpu(compl, sizeof(*compl));
2545
2546         if (adapter->be3_native)
2547                 be_parse_rx_compl_v1(compl, rxcp);
2548         else
2549                 be_parse_rx_compl_v0(compl, rxcp);
2550
2551         if (rxcp->ip_frag)
2552                 rxcp->l4_csum = 0;
2553
2554         if (rxcp->vlanf) {
2555                 /* In QNQ modes, if qnq bit is not set, then the packet was
2556                  * tagged only with the transparent outer vlan-tag and must
2557                  * not be treated as a vlan packet by host
2558                  */
2559                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2560                         rxcp->vlanf = 0;
2561
2562                 if (!lancer_chip(adapter))
2563                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2564
2565                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2566                     !test_bit(rxcp->vlan_tag, adapter->vids))
2567                         rxcp->vlanf = 0;
2568         }
2569
2570         /* As the compl has been parsed, reset it; we wont touch it again */
2571         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2572
2573         queue_tail_inc(&rxo->cq);
2574         return rxcp;
2575 }
2576
2577 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2578 {
2579         u32 order = get_order(size);
2580
2581         if (order > 0)
2582                 gfp |= __GFP_COMP;
2583         return  alloc_pages(gfp, order);
2584 }
2585
2586 /*
2587  * Allocate a page, split it to fragments of size rx_frag_size and post as
2588  * receive buffers to BE
2589  */
2590 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2591 {
2592         struct be_adapter *adapter = rxo->adapter;
2593         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2594         struct be_queue_info *rxq = &rxo->q;
2595         struct page *pagep = NULL;
2596         struct device *dev = &adapter->pdev->dev;
2597         struct be_eth_rx_d *rxd;
2598         u64 page_dmaaddr = 0, frag_dmaaddr;
2599         u32 posted, page_offset = 0, notify = 0;
2600
2601         page_info = &rxo->page_info_tbl[rxq->head];
2602         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2603                 if (!pagep) {
2604                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2605                         if (unlikely(!pagep)) {
2606                                 rx_stats(rxo)->rx_post_fail++;
2607                                 break;
2608                         }
2609                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2610                                                     adapter->big_page_size,
2611                                                     DMA_FROM_DEVICE);
2612                         if (dma_mapping_error(dev, page_dmaaddr)) {
2613                                 put_page(pagep);
2614                                 pagep = NULL;
2615                                 adapter->drv_stats.dma_map_errors++;
2616                                 break;
2617                         }
2618                         page_offset = 0;
2619                 } else {
2620                         get_page(pagep);
2621                         page_offset += rx_frag_size;
2622                 }
2623                 page_info->page_offset = page_offset;
2624                 page_info->page = pagep;
2625
2626                 rxd = queue_head_node(rxq);
2627                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2628                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2629                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2630
2631                 /* Any space left in the current big page for another frag? */
2632                 if ((page_offset + rx_frag_size + rx_frag_size) >
2633                                         adapter->big_page_size) {
2634                         pagep = NULL;
2635                         page_info->last_frag = true;
2636                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2637                 } else {
2638                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2639                 }
2640
2641                 prev_page_info = page_info;
2642                 queue_head_inc(rxq);
2643                 page_info = &rxo->page_info_tbl[rxq->head];
2644         }
2645
2646         /* Mark the last frag of a page when we break out of the above loop
2647          * with no more slots available in the RXQ
2648          */
2649         if (pagep) {
2650                 prev_page_info->last_frag = true;
2651                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2652         }
2653
2654         if (posted) {
2655                 atomic_add(posted, &rxq->used);
2656                 if (rxo->rx_post_starved)
2657                         rxo->rx_post_starved = false;
2658                 do {
2659                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2660                         be_rxq_notify(adapter, rxq->id, notify);
2661                         posted -= notify;
2662                 } while (posted);
2663         } else if (atomic_read(&rxq->used) == 0) {
2664                 /* Let be_worker replenish when memory is available */
2665                 rxo->rx_post_starved = true;
2666         }
2667 }
2668
2669 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2670 {
2671         switch (status) {
2672         case BE_TX_COMP_HDR_PARSE_ERR:
2673                 tx_stats(txo)->tx_hdr_parse_err++;
2674                 break;
2675         case BE_TX_COMP_NDMA_ERR:
2676                 tx_stats(txo)->tx_dma_err++;
2677                 break;
2678         case BE_TX_COMP_ACL_ERR:
2679                 tx_stats(txo)->tx_spoof_check_err++;
2680                 break;
2681         }
2682 }
2683
2684 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2685 {
2686         switch (status) {
2687         case LANCER_TX_COMP_LSO_ERR:
2688                 tx_stats(txo)->tx_tso_err++;
2689                 break;
2690         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2691         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2692                 tx_stats(txo)->tx_spoof_check_err++;
2693                 break;
2694         case LANCER_TX_COMP_QINQ_ERR:
2695                 tx_stats(txo)->tx_qinq_err++;
2696                 break;
2697         case LANCER_TX_COMP_PARITY_ERR:
2698                 tx_stats(txo)->tx_internal_parity_err++;
2699                 break;
2700         case LANCER_TX_COMP_DMA_ERR:
2701                 tx_stats(txo)->tx_dma_err++;
2702                 break;
2703         case LANCER_TX_COMP_SGE_ERR:
2704                 tx_stats(txo)->tx_sge_err++;
2705                 break;
2706         }
2707 }
2708
2709 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2710                                                 struct be_tx_obj *txo)
2711 {
2712         struct be_queue_info *tx_cq = &txo->cq;
2713         struct be_tx_compl_info *txcp = &txo->txcp;
2714         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2715
2716         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2717                 return NULL;
2718
2719         /* Ensure load ordering of valid bit dword and other dwords below */
2720         rmb();
2721         be_dws_le_to_cpu(compl, sizeof(*compl));
2722
2723         txcp->status = GET_TX_COMPL_BITS(status, compl);
2724         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2725
2726         if (txcp->status) {
2727                 if (lancer_chip(adapter)) {
2728                         lancer_update_tx_err(txo, txcp->status);
2729                         /* Reset the adapter incase of TSO,
2730                          * SGE or Parity error
2731                          */
2732                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2733                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2734                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2735                                 be_set_error(adapter, BE_ERROR_TX);
2736                 } else {
2737                         be_update_tx_err(txo, txcp->status);
2738                 }
2739         }
2740
2741         if (be_check_error(adapter, BE_ERROR_TX))
2742                 return NULL;
2743
2744         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2745         queue_tail_inc(tx_cq);
2746         return txcp;
2747 }
2748
2749 static u16 be_tx_compl_process(struct be_adapter *adapter,
2750                                struct be_tx_obj *txo, u16 last_index)
2751 {
2752         struct sk_buff **sent_skbs = txo->sent_skb_list;
2753         struct be_queue_info *txq = &txo->q;
2754         struct sk_buff *skb = NULL;
2755         bool unmap_skb_hdr = false;
2756         struct be_eth_wrb *wrb;
2757         u16 num_wrbs = 0;
2758         u32 frag_index;
2759
2760         do {
2761                 if (sent_skbs[txq->tail]) {
2762                         /* Free skb from prev req */
2763                         if (skb)
2764                                 dev_consume_skb_any(skb);
2765                         skb = sent_skbs[txq->tail];
2766                         sent_skbs[txq->tail] = NULL;
2767                         queue_tail_inc(txq);  /* skip hdr wrb */
2768                         num_wrbs++;
2769                         unmap_skb_hdr = true;
2770                 }
2771                 wrb = queue_tail_node(txq);
2772                 frag_index = txq->tail;
2773                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2774                               (unmap_skb_hdr && skb_headlen(skb)));
2775                 unmap_skb_hdr = false;
2776                 queue_tail_inc(txq);
2777                 num_wrbs++;
2778         } while (frag_index != last_index);
2779         dev_consume_skb_any(skb);
2780
2781         return num_wrbs;
2782 }
2783
2784 /* Return the number of events in the event queue */
2785 static inline int events_get(struct be_eq_obj *eqo)
2786 {
2787         struct be_eq_entry *eqe;
2788         int num = 0;
2789
2790         do {
2791                 eqe = queue_tail_node(&eqo->q);
2792                 if (eqe->evt == 0)
2793                         break;
2794
2795                 rmb();
2796                 eqe->evt = 0;
2797                 num++;
2798                 queue_tail_inc(&eqo->q);
2799         } while (true);
2800
2801         return num;
2802 }
2803
2804 /* Leaves the EQ is disarmed state */
2805 static void be_eq_clean(struct be_eq_obj *eqo)
2806 {
2807         int num = events_get(eqo);
2808
2809         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2810 }
2811
2812 /* Free posted rx buffers that were not used */
2813 static void be_rxq_clean(struct be_rx_obj *rxo)
2814 {
2815         struct be_queue_info *rxq = &rxo->q;
2816         struct be_rx_page_info *page_info;
2817
2818         while (atomic_read(&rxq->used) > 0) {
2819                 page_info = get_rx_page_info(rxo);
2820                 put_page(page_info->page);
2821                 memset(page_info, 0, sizeof(*page_info));
2822         }
2823         BUG_ON(atomic_read(&rxq->used));
2824         rxq->tail = 0;
2825         rxq->head = 0;
2826 }
2827
2828 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2829 {
2830         struct be_queue_info *rx_cq = &rxo->cq;
2831         struct be_rx_compl_info *rxcp;
2832         struct be_adapter *adapter = rxo->adapter;
2833         int flush_wait = 0;
2834
2835         /* Consume pending rx completions.
2836          * Wait for the flush completion (identified by zero num_rcvd)
2837          * to arrive. Notify CQ even when there are no more CQ entries
2838          * for HW to flush partially coalesced CQ entries.
2839          * In Lancer, there is no need to wait for flush compl.
2840          */
2841         for (;;) {
2842                 rxcp = be_rx_compl_get(rxo);
2843                 if (!rxcp) {
2844                         if (lancer_chip(adapter))
2845                                 break;
2846
2847                         if (flush_wait++ > 50 ||
2848                             be_check_error(adapter,
2849                                            BE_ERROR_HW)) {
2850                                 dev_warn(&adapter->pdev->dev,
2851                                          "did not receive flush compl\n");
2852                                 break;
2853                         }
2854                         be_cq_notify(adapter, rx_cq->id, true, 0);
2855                         mdelay(1);
2856                 } else {
2857                         be_rx_compl_discard(rxo, rxcp);
2858                         be_cq_notify(adapter, rx_cq->id, false, 1);
2859                         if (rxcp->num_rcvd == 0)
2860                                 break;
2861                 }
2862         }
2863
2864         /* After cleanup, leave the CQ in unarmed state */
2865         be_cq_notify(adapter, rx_cq->id, false, 0);
2866 }
2867
2868 static void be_tx_compl_clean(struct be_adapter *adapter)
2869 {
2870         struct device *dev = &adapter->pdev->dev;
2871         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2872         struct be_tx_compl_info *txcp;
2873         struct be_queue_info *txq;
2874         u32 end_idx, notified_idx;
2875         struct be_tx_obj *txo;
2876         int i, pending_txqs;
2877
2878         /* Stop polling for compls when HW has been silent for 10ms */
2879         do {
2880                 pending_txqs = adapter->num_tx_qs;
2881
2882                 for_all_tx_queues(adapter, txo, i) {
2883                         cmpl = 0;
2884                         num_wrbs = 0;
2885                         txq = &txo->q;
2886                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2887                                 num_wrbs +=
2888                                         be_tx_compl_process(adapter, txo,
2889                                                             txcp->end_index);
2890                                 cmpl++;
2891                         }
2892                         if (cmpl) {
2893                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2894                                 atomic_sub(num_wrbs, &txq->used);
2895                                 timeo = 0;
2896                         }
2897                         if (!be_is_tx_compl_pending(txo))
2898                                 pending_txqs--;
2899                 }
2900
2901                 if (pending_txqs == 0 || ++timeo > 10 ||
2902                     be_check_error(adapter, BE_ERROR_HW))
2903                         break;
2904
2905                 mdelay(1);
2906         } while (true);
2907
2908         /* Free enqueued TX that was never notified to HW */
2909         for_all_tx_queues(adapter, txo, i) {
2910                 txq = &txo->q;
2911
2912                 if (atomic_read(&txq->used)) {
2913                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2914                                  i, atomic_read(&txq->used));
2915                         notified_idx = txq->tail;
2916                         end_idx = txq->tail;
2917                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2918                                   txq->len);
2919                         /* Use the tx-compl process logic to handle requests
2920                          * that were not sent to the HW.
2921                          */
2922                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2923                         atomic_sub(num_wrbs, &txq->used);
2924                         BUG_ON(atomic_read(&txq->used));
2925                         txo->pend_wrb_cnt = 0;
2926                         /* Since hw was never notified of these requests,
2927                          * reset TXQ indices
2928                          */
2929                         txq->head = notified_idx;
2930                         txq->tail = notified_idx;
2931                 }
2932         }
2933 }
2934
2935 static void be_evt_queues_destroy(struct be_adapter *adapter)
2936 {
2937         struct be_eq_obj *eqo;
2938         int i;
2939
2940         for_all_evt_queues(adapter, eqo, i) {
2941                 if (eqo->q.created) {
2942                         be_eq_clean(eqo);
2943                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2944                         netif_napi_del(&eqo->napi);
2945                         free_cpumask_var(eqo->affinity_mask);
2946                 }
2947                 be_queue_free(adapter, &eqo->q);
2948         }
2949 }
2950
2951 static int be_evt_queues_create(struct be_adapter *adapter)
2952 {
2953         struct be_queue_info *eq;
2954         struct be_eq_obj *eqo;
2955         struct be_aic_obj *aic;
2956         int i, rc;
2957
2958         /* need enough EQs to service both RX and TX queues */
2959         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2960                                     max(adapter->cfg_num_rx_irqs,
2961                                         adapter->cfg_num_tx_irqs));
2962
2963         adapter->aic_enabled = true;
2964
2965         for_all_evt_queues(adapter, eqo, i) {
2966                 int numa_node = dev_to_node(&adapter->pdev->dev);
2967
2968                 aic = &adapter->aic_obj[i];
2969                 eqo->adapter = adapter;
2970                 eqo->idx = i;
2971                 aic->max_eqd = BE_MAX_EQD;
2972
2973                 eq = &eqo->q;
2974                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2975                                     sizeof(struct be_eq_entry));
2976                 if (rc)
2977                         return rc;
2978
2979                 rc = be_cmd_eq_create(adapter, eqo);
2980                 if (rc)
2981                         return rc;
2982
2983                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2984                         return -ENOMEM;
2985                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2986                                 eqo->affinity_mask);
2987                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll);
2988         }
2989         return 0;
2990 }
2991
2992 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2993 {
2994         struct be_queue_info *q;
2995
2996         q = &adapter->mcc_obj.q;
2997         if (q->created)
2998                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2999         be_queue_free(adapter, q);
3000
3001         q = &adapter->mcc_obj.cq;
3002         if (q->created)
3003                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3004         be_queue_free(adapter, q);
3005 }
3006
3007 /* Must be called only after TX qs are created as MCC shares TX EQ */
3008 static int be_mcc_queues_create(struct be_adapter *adapter)
3009 {
3010         struct be_queue_info *q, *cq;
3011
3012         cq = &adapter->mcc_obj.cq;
3013         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3014                            sizeof(struct be_mcc_compl)))
3015                 goto err;
3016
3017         /* Use the default EQ for MCC completions */
3018         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3019                 goto mcc_cq_free;
3020
3021         q = &adapter->mcc_obj.q;
3022         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3023                 goto mcc_cq_destroy;
3024
3025         if (be_cmd_mccq_create(adapter, q, cq))
3026                 goto mcc_q_free;
3027
3028         return 0;
3029
3030 mcc_q_free:
3031         be_queue_free(adapter, q);
3032 mcc_cq_destroy:
3033         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3034 mcc_cq_free:
3035         be_queue_free(adapter, cq);
3036 err:
3037         return -1;
3038 }
3039
3040 static void be_tx_queues_destroy(struct be_adapter *adapter)
3041 {
3042         struct be_queue_info *q;
3043         struct be_tx_obj *txo;
3044         u8 i;
3045
3046         for_all_tx_queues(adapter, txo, i) {
3047                 q = &txo->q;
3048                 if (q->created)
3049                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3050                 be_queue_free(adapter, q);
3051
3052                 q = &txo->cq;
3053                 if (q->created)
3054                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3055                 be_queue_free(adapter, q);
3056         }
3057 }
3058
3059 static int be_tx_qs_create(struct be_adapter *adapter)
3060 {
3061         struct be_queue_info *cq;
3062         struct be_tx_obj *txo;
3063         struct be_eq_obj *eqo;
3064         int status, i;
3065
3066         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3067
3068         for_all_tx_queues(adapter, txo, i) {
3069                 cq = &txo->cq;
3070                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3071                                         sizeof(struct be_eth_tx_compl));
3072                 if (status)
3073                         return status;
3074
3075                 u64_stats_init(&txo->stats.sync);
3076                 u64_stats_init(&txo->stats.sync_compl);
3077
3078                 /* If num_evt_qs is less than num_tx_qs, then more than
3079                  * one txq share an eq
3080                  */
3081                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3082                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3083                 if (status)
3084                         return status;
3085
3086                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3087                                         sizeof(struct be_eth_wrb));
3088                 if (status)
3089                         return status;
3090
3091                 status = be_cmd_txq_create(adapter, txo);
3092                 if (status)
3093                         return status;
3094
3095                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3096                                     eqo->idx);
3097         }
3098
3099         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3100                  adapter->num_tx_qs);
3101         return 0;
3102 }
3103
3104 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3105 {
3106         struct be_queue_info *q;
3107         struct be_rx_obj *rxo;
3108         int i;
3109
3110         for_all_rx_queues(adapter, rxo, i) {
3111                 q = &rxo->cq;
3112                 if (q->created)
3113                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3114                 be_queue_free(adapter, q);
3115         }
3116 }
3117
3118 static int be_rx_cqs_create(struct be_adapter *adapter)
3119 {
3120         struct be_queue_info *eq, *cq;
3121         struct be_rx_obj *rxo;
3122         int rc, i;
3123
3124         adapter->num_rss_qs =
3125                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3126
3127         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3128         if (adapter->num_rss_qs < 2)
3129                 adapter->num_rss_qs = 0;
3130
3131         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3132
3133         /* When the interface is not capable of RSS rings (and there is no
3134          * need to create a default RXQ) we'll still need one RXQ
3135          */
3136         if (adapter->num_rx_qs == 0)
3137                 adapter->num_rx_qs = 1;
3138
3139         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3140         for_all_rx_queues(adapter, rxo, i) {
3141                 rxo->adapter = adapter;
3142                 cq = &rxo->cq;
3143                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3144                                     sizeof(struct be_eth_rx_compl));
3145                 if (rc)
3146                         return rc;
3147
3148                 u64_stats_init(&rxo->stats.sync);
3149                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3150                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3151                 if (rc)
3152                         return rc;
3153         }
3154
3155         dev_info(&adapter->pdev->dev,
3156                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3157         return 0;
3158 }
3159
3160 static irqreturn_t be_intx(int irq, void *dev)
3161 {
3162         struct be_eq_obj *eqo = dev;
3163         struct be_adapter *adapter = eqo->adapter;
3164         int num_evts = 0;
3165
3166         /* IRQ is not expected when NAPI is scheduled as the EQ
3167          * will not be armed.
3168          * But, this can happen on Lancer INTx where it takes
3169          * a while to de-assert INTx or in BE2 where occasionaly
3170          * an interrupt may be raised even when EQ is unarmed.
3171          * If NAPI is already scheduled, then counting & notifying
3172          * events will orphan them.
3173          */
3174         if (napi_schedule_prep(&eqo->napi)) {
3175                 num_evts = events_get(eqo);
3176                 __napi_schedule(&eqo->napi);
3177                 if (num_evts)
3178                         eqo->spurious_intr = 0;
3179         }
3180         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3181
3182         /* Return IRQ_HANDLED only for the first spurious intr
3183          * after a valid intr to stop the kernel from branding
3184          * this irq as a bad one!
3185          */
3186         if (num_evts || eqo->spurious_intr++ == 0)
3187                 return IRQ_HANDLED;
3188         else
3189                 return IRQ_NONE;
3190 }
3191
3192 static irqreturn_t be_msix(int irq, void *dev)
3193 {
3194         struct be_eq_obj *eqo = dev;
3195
3196         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3197         napi_schedule(&eqo->napi);
3198         return IRQ_HANDLED;
3199 }
3200
3201 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3202 {
3203         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3204 }
3205
3206 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3207                          int budget)
3208 {
3209         struct be_adapter *adapter = rxo->adapter;
3210         struct be_queue_info *rx_cq = &rxo->cq;
3211         struct be_rx_compl_info *rxcp;
3212         u32 work_done;
3213         u32 frags_consumed = 0;
3214
3215         for (work_done = 0; work_done < budget; work_done++) {
3216                 rxcp = be_rx_compl_get(rxo);
3217                 if (!rxcp)
3218                         break;
3219
3220                 /* Is it a flush compl that has no data */
3221                 if (unlikely(rxcp->num_rcvd == 0))
3222                         goto loop_continue;
3223
3224                 /* Discard compl with partial DMA Lancer B0 */
3225                 if (unlikely(!rxcp->pkt_size)) {
3226                         be_rx_compl_discard(rxo, rxcp);
3227                         goto loop_continue;
3228                 }
3229
3230                 /* On BE drop pkts that arrive due to imperfect filtering in
3231                  * promiscuous mode on some skews
3232                  */
3233                 if (unlikely(rxcp->port != adapter->port_num &&
3234                              !lancer_chip(adapter))) {
3235                         be_rx_compl_discard(rxo, rxcp);
3236                         goto loop_continue;
3237                 }
3238
3239                 if (do_gro(rxcp))
3240                         be_rx_compl_process_gro(rxo, napi, rxcp);
3241                 else
3242                         be_rx_compl_process(rxo, napi, rxcp);
3243
3244 loop_continue:
3245                 frags_consumed += rxcp->num_rcvd;
3246                 be_rx_stats_update(rxo, rxcp);
3247         }
3248
3249         if (work_done) {
3250                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3251
3252                 /* When an rx-obj gets into post_starved state, just
3253                  * let be_worker do the posting.
3254                  */
3255                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3256                     !rxo->rx_post_starved)
3257                         be_post_rx_frags(rxo, GFP_ATOMIC,
3258                                          max_t(u32, MAX_RX_POST,
3259                                                frags_consumed));
3260         }
3261
3262         return work_done;
3263 }
3264
3265
3266 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3267                           int idx)
3268 {
3269         int num_wrbs = 0, work_done = 0;
3270         struct be_tx_compl_info *txcp;
3271
3272         while ((txcp = be_tx_compl_get(adapter, txo))) {
3273                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3274                 work_done++;
3275         }
3276
3277         if (work_done) {
3278                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3279                 atomic_sub(num_wrbs, &txo->q.used);
3280
3281                 /* As Tx wrbs have been freed up, wake up netdev queue
3282                  * if it was stopped due to lack of tx wrbs.  */
3283                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3284                     be_can_txq_wake(txo)) {
3285                         netif_wake_subqueue(adapter->netdev, idx);
3286                 }
3287
3288                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3289                 tx_stats(txo)->tx_compl += work_done;
3290                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3291         }
3292 }
3293
3294 int be_poll(struct napi_struct *napi, int budget)
3295 {
3296         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3297         struct be_adapter *adapter = eqo->adapter;
3298         int max_work = 0, work, i, num_evts;
3299         struct be_rx_obj *rxo;
3300         struct be_tx_obj *txo;
3301         u32 mult_enc = 0;
3302
3303         num_evts = events_get(eqo);
3304
3305         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3306                 be_process_tx(adapter, txo, i);
3307
3308         /* This loop will iterate twice for EQ0 in which
3309          * completions of the last RXQ (default one) are also processed
3310          * For other EQs the loop iterates only once
3311          */
3312         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313                 work = be_process_rx(rxo, napi, budget);
3314                 max_work = max(work, max_work);
3315         }
3316
3317         if (is_mcc_eqo(eqo))
3318                 be_process_mcc(adapter);
3319
3320         if (max_work < budget) {
3321                 napi_complete_done(napi, max_work);
3322
3323                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324                  * delay via a delay multiplier encoding value
3325                  */
3326                 if (skyhawk_chip(adapter))
3327                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3328
3329                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330                              mult_enc);
3331         } else {
3332                 /* As we'll continue in polling mode, count and clear events */
3333                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334         }
3335         return max_work;
3336 }
3337
3338 void be_detect_error(struct be_adapter *adapter)
3339 {
3340         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3341         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3342         struct device *dev = &adapter->pdev->dev;
3343         u16 val;
3344         u32 i;
3345
3346         if (be_check_error(adapter, BE_ERROR_HW))
3347                 return;
3348
3349         if (lancer_chip(adapter)) {
3350                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3351                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3352                         be_set_error(adapter, BE_ERROR_UE);
3353                         sliport_err1 = ioread32(adapter->db +
3354                                                 SLIPORT_ERROR1_OFFSET);
3355                         sliport_err2 = ioread32(adapter->db +
3356                                                 SLIPORT_ERROR2_OFFSET);
3357                         /* Do not log error messages if its a FW reset */
3358                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3359                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3360                                 dev_info(dev, "Reset is in progress\n");
3361                         } else {
3362                                 dev_err(dev, "Error detected in the card\n");
3363                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3364                                         sliport_status);
3365                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3366                                         sliport_err1);
3367                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3368                                         sliport_err2);
3369                         }
3370                 }
3371         } else {
3372                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3373                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3374                 ue_lo_mask = ioread32(adapter->pcicfg +
3375                                       PCICFG_UE_STATUS_LOW_MASK);
3376                 ue_hi_mask = ioread32(adapter->pcicfg +
3377                                       PCICFG_UE_STATUS_HI_MASK);
3378
3379                 ue_lo = (ue_lo & ~ue_lo_mask);
3380                 ue_hi = (ue_hi & ~ue_hi_mask);
3381
3382                 if (ue_lo || ue_hi) {
3383                         /* On certain platforms BE3 hardware can indicate
3384                          * spurious UEs. In case of a UE in the chip,
3385                          * the POST register correctly reports either a
3386                          * FAT_LOG_START state (FW is currently dumping
3387                          * FAT log data) or a ARMFW_UE state. Check for the
3388                          * above states to ascertain if the UE is valid or not.
3389                          */
3390                         if (BE3_chip(adapter)) {
3391                                 val = be_POST_stage_get(adapter);
3392                                 if ((val & POST_STAGE_FAT_LOG_START)
3393                                      != POST_STAGE_FAT_LOG_START &&
3394                                     (val & POST_STAGE_ARMFW_UE)
3395                                      != POST_STAGE_ARMFW_UE &&
3396                                     (val & POST_STAGE_RECOVERABLE_ERR)
3397                                      != POST_STAGE_RECOVERABLE_ERR)
3398                                         return;
3399                         }
3400
3401                         dev_err(dev, "Error detected in the adapter");
3402                         be_set_error(adapter, BE_ERROR_UE);
3403
3404                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3405                                 if (ue_lo & 1)
3406                                         dev_err(dev, "UE: %s bit set\n",
3407                                                 ue_status_low_desc[i]);
3408                         }
3409                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3410                                 if (ue_hi & 1)
3411                                         dev_err(dev, "UE: %s bit set\n",
3412                                                 ue_status_hi_desc[i]);
3413                         }
3414                 }
3415         }
3416 }
3417
3418 static void be_msix_disable(struct be_adapter *adapter)
3419 {
3420         if (msix_enabled(adapter)) {
3421                 pci_disable_msix(adapter->pdev);
3422                 adapter->num_msix_vec = 0;
3423                 adapter->num_msix_roce_vec = 0;
3424         }
3425 }
3426
3427 static int be_msix_enable(struct be_adapter *adapter)
3428 {
3429         unsigned int i, max_roce_eqs;
3430         struct device *dev = &adapter->pdev->dev;
3431         int num_vec;
3432
3433         /* If RoCE is supported, program the max number of vectors that
3434          * could be used for NIC and RoCE, else, just program the number
3435          * we'll use initially.
3436          */
3437         if (be_roce_supported(adapter)) {
3438                 max_roce_eqs =
3439                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3440                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3441                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3442         } else {
3443                 num_vec = max(adapter->cfg_num_rx_irqs,
3444                               adapter->cfg_num_tx_irqs);
3445         }
3446
3447         for (i = 0; i < num_vec; i++)
3448                 adapter->msix_entries[i].entry = i;
3449
3450         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3451                                         MIN_MSIX_VECTORS, num_vec);
3452         if (num_vec < 0)
3453                 goto fail;
3454
3455         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3456                 adapter->num_msix_roce_vec = num_vec / 2;
3457                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3458                          adapter->num_msix_roce_vec);
3459         }
3460
3461         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3462
3463         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3464                  adapter->num_msix_vec);
3465         return 0;
3466
3467 fail:
3468         dev_warn(dev, "MSIx enable failed\n");
3469
3470         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3471         if (be_virtfn(adapter))
3472                 return num_vec;
3473         return 0;
3474 }
3475
3476 static inline int be_msix_vec_get(struct be_adapter *adapter,
3477                                   struct be_eq_obj *eqo)
3478 {
3479         return adapter->msix_entries[eqo->msix_idx].vector;
3480 }
3481
3482 static int be_msix_register(struct be_adapter *adapter)
3483 {
3484         struct net_device *netdev = adapter->netdev;
3485         struct be_eq_obj *eqo;
3486         int status, i, vec;
3487
3488         for_all_evt_queues(adapter, eqo, i) {
3489                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3490                 vec = be_msix_vec_get(adapter, eqo);
3491                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3492                 if (status)
3493                         goto err_msix;
3494
3495                 irq_update_affinity_hint(vec, eqo->affinity_mask);
3496         }
3497
3498         return 0;
3499 err_msix:
3500         for (i--; i >= 0; i--) {
3501                 eqo = &adapter->eq_obj[i];
3502                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3503         }
3504         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3505                  status);
3506         be_msix_disable(adapter);
3507         return status;
3508 }
3509
3510 static int be_irq_register(struct be_adapter *adapter)
3511 {
3512         struct net_device *netdev = adapter->netdev;
3513         int status;
3514
3515         if (msix_enabled(adapter)) {
3516                 status = be_msix_register(adapter);
3517                 if (status == 0)
3518                         goto done;
3519                 /* INTx is not supported for VF */
3520                 if (be_virtfn(adapter))
3521                         return status;
3522         }
3523
3524         /* INTx: only the first EQ is used */
3525         netdev->irq = adapter->pdev->irq;
3526         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3527                              &adapter->eq_obj[0]);
3528         if (status) {
3529                 dev_err(&adapter->pdev->dev,
3530                         "INTx request IRQ failed - err %d\n", status);
3531                 return status;
3532         }
3533 done:
3534         adapter->isr_registered = true;
3535         return 0;
3536 }
3537
3538 static void be_irq_unregister(struct be_adapter *adapter)
3539 {
3540         struct net_device *netdev = adapter->netdev;
3541         struct be_eq_obj *eqo;
3542         int i, vec;
3543
3544         if (!adapter->isr_registered)
3545                 return;
3546
3547         /* INTx */
3548         if (!msix_enabled(adapter)) {
3549                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3550                 goto done;
3551         }
3552
3553         /* MSIx */
3554         for_all_evt_queues(adapter, eqo, i) {
3555                 vec = be_msix_vec_get(adapter, eqo);
3556                 irq_update_affinity_hint(vec, NULL);
3557                 free_irq(vec, eqo);
3558         }
3559
3560 done:
3561         adapter->isr_registered = false;
3562 }
3563
3564 static void be_rx_qs_destroy(struct be_adapter *adapter)
3565 {
3566         struct rss_info *rss = &adapter->rss_info;
3567         struct be_queue_info *q;
3568         struct be_rx_obj *rxo;
3569         int i;
3570
3571         for_all_rx_queues(adapter, rxo, i) {
3572                 q = &rxo->q;
3573                 if (q->created) {
3574                         /* If RXQs are destroyed while in an "out of buffer"
3575                          * state, there is a possibility of an HW stall on
3576                          * Lancer. So, post 64 buffers to each queue to relieve
3577                          * the "out of buffer" condition.
3578                          * Make sure there's space in the RXQ before posting.
3579                          */
3580                         if (lancer_chip(adapter)) {
3581                                 be_rx_cq_clean(rxo);
3582                                 if (atomic_read(&q->used) == 0)
3583                                         be_post_rx_frags(rxo, GFP_KERNEL,
3584                                                          MAX_RX_POST);
3585                         }
3586
3587                         be_cmd_rxq_destroy(adapter, q);
3588                         be_rx_cq_clean(rxo);
3589                         be_rxq_clean(rxo);
3590                 }
3591                 be_queue_free(adapter, q);
3592         }
3593
3594         if (rss->rss_flags) {
3595                 rss->rss_flags = RSS_ENABLE_NONE;
3596                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3597                                   128, rss->rss_hkey);
3598         }
3599 }
3600
3601 static void be_disable_if_filters(struct be_adapter *adapter)
3602 {
3603         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3604         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3605             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3606                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3607                 eth_zero_addr(adapter->dev_mac);
3608         }
3609
3610         be_clear_uc_list(adapter);
3611         be_clear_mc_list(adapter);
3612
3613         /* The IFACE flags are enabled in the open path and cleared
3614          * in the close path. When a VF gets detached from the host and
3615          * assigned to a VM the following happens:
3616          *      - VF's IFACE flags get cleared in the detach path
3617          *      - IFACE create is issued by the VF in the attach path
3618          * Due to a bug in the BE3/Skyhawk-R FW
3619          * (Lancer FW doesn't have the bug), the IFACE capability flags
3620          * specified along with the IFACE create cmd issued by a VF are not
3621          * honoured by FW.  As a consequence, if a *new* driver
3622          * (that enables/disables IFACE flags in open/close)
3623          * is loaded in the host and an *old* driver is * used by a VM/VF,
3624          * the IFACE gets created *without* the needed flags.
3625          * To avoid this, disable RX-filter flags only for Lancer.
3626          */
3627         if (lancer_chip(adapter)) {
3628                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3629                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3630         }
3631 }
3632
3633 static int be_close(struct net_device *netdev)
3634 {
3635         struct be_adapter *adapter = netdev_priv(netdev);
3636         struct be_eq_obj *eqo;
3637         int i;
3638
3639         /* This protection is needed as be_close() may be called even when the
3640          * adapter is in cleared state (after eeh perm failure)
3641          */
3642         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3643                 return 0;
3644
3645         /* Before attempting cleanup ensure all the pending cmds in the
3646          * config_wq have finished execution
3647          */
3648         flush_workqueue(be_wq);
3649
3650         be_disable_if_filters(adapter);
3651
3652         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3653                 for_all_evt_queues(adapter, eqo, i) {
3654                         napi_disable(&eqo->napi);
3655                 }
3656                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3657         }
3658
3659         be_async_mcc_disable(adapter);
3660
3661         /* Wait for all pending tx completions to arrive so that
3662          * all tx skbs are freed.
3663          */
3664         netif_tx_disable(netdev);
3665         be_tx_compl_clean(adapter);
3666
3667         be_rx_qs_destroy(adapter);
3668
3669         for_all_evt_queues(adapter, eqo, i) {
3670                 if (msix_enabled(adapter))
3671                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3672                 else
3673                         synchronize_irq(netdev->irq);
3674                 be_eq_clean(eqo);
3675         }
3676
3677         be_irq_unregister(adapter);
3678
3679         return 0;
3680 }
3681
3682 static int be_rx_qs_create(struct be_adapter *adapter)
3683 {
3684         struct rss_info *rss = &adapter->rss_info;
3685         u8 rss_key[RSS_HASH_KEY_LEN];
3686         struct be_rx_obj *rxo;
3687         int rc, i, j;
3688
3689         for_all_rx_queues(adapter, rxo, i) {
3690                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3691                                     sizeof(struct be_eth_rx_d));
3692                 if (rc)
3693                         return rc;
3694         }
3695
3696         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3697                 rxo = default_rxo(adapter);
3698                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3699                                        rx_frag_size, adapter->if_handle,
3700                                        false, &rxo->rss_id);
3701                 if (rc)
3702                         return rc;
3703         }
3704
3705         for_all_rss_queues(adapter, rxo, i) {
3706                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707                                        rx_frag_size, adapter->if_handle,
3708                                        true, &rxo->rss_id);
3709                 if (rc)
3710                         return rc;
3711         }
3712
3713         if (be_multi_rxq(adapter)) {
3714                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3715                         for_all_rss_queues(adapter, rxo, i) {
3716                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3717                                         break;
3718                                 rss->rsstable[j + i] = rxo->rss_id;
3719                                 rss->rss_queue[j + i] = i;
3720                         }
3721                 }
3722                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3723                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3724
3725                 if (!BEx_chip(adapter))
3726                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3727                                 RSS_ENABLE_UDP_IPV6;
3728
3729                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3730                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3731                                        RSS_INDIR_TABLE_LEN, rss_key);
3732                 if (rc) {
3733                         rss->rss_flags = RSS_ENABLE_NONE;
3734                         return rc;
3735                 }
3736
3737                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3738         } else {
3739                 /* Disable RSS, if only default RX Q is created */
3740                 rss->rss_flags = RSS_ENABLE_NONE;
3741         }
3742
3743
3744         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3745          * which is a queue empty condition
3746          */
3747         for_all_rx_queues(adapter, rxo, i)
3748                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3749
3750         return 0;
3751 }
3752
3753 static int be_enable_if_filters(struct be_adapter *adapter)
3754 {
3755         int status;
3756
3757         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3758         if (status)
3759                 return status;
3760
3761         /* Normally this condition usually true as the ->dev_mac is zeroed.
3762          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3763          * subsequent be_dev_mac_add() can fail (after fresh boot)
3764          */
3765         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3766                 int old_pmac_id = -1;
3767
3768                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3769                 if (!is_zero_ether_addr(adapter->dev_mac))
3770                         old_pmac_id = adapter->pmac_id[0];
3771
3772                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3773                 if (status)
3774                         return status;
3775
3776                 /* Delete the old programmed MAC as we successfully programmed
3777                  * a new MAC
3778                  */
3779                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3780                         be_dev_mac_del(adapter, old_pmac_id);
3781
3782                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783         }
3784
3785         if (adapter->vlans_added)
3786                 be_vid_config(adapter);
3787
3788         __be_set_rx_mode(adapter);
3789
3790         return 0;
3791 }
3792
3793 static int be_open(struct net_device *netdev)
3794 {
3795         struct be_adapter *adapter = netdev_priv(netdev);
3796         struct be_eq_obj *eqo;
3797         struct be_rx_obj *rxo;
3798         struct be_tx_obj *txo;
3799         u8 link_status;
3800         int status, i;
3801
3802         status = be_rx_qs_create(adapter);
3803         if (status)
3804                 goto err;
3805
3806         status = be_enable_if_filters(adapter);
3807         if (status)
3808                 goto err;
3809
3810         status = be_irq_register(adapter);
3811         if (status)
3812                 goto err;
3813
3814         for_all_rx_queues(adapter, rxo, i)
3815                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3816
3817         for_all_tx_queues(adapter, txo, i)
3818                 be_cq_notify(adapter, txo->cq.id, true, 0);
3819
3820         be_async_mcc_enable(adapter);
3821
3822         for_all_evt_queues(adapter, eqo, i) {
3823                 napi_enable(&eqo->napi);
3824                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3825         }
3826         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3827
3828         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3829         if (!status)
3830                 be_link_status_update(adapter, link_status);
3831
3832         netif_tx_start_all_queues(netdev);
3833
3834         udp_tunnel_nic_reset_ntf(netdev);
3835
3836         return 0;
3837 err:
3838         be_close(adapter->netdev);
3839         return -EIO;
3840 }
3841
3842 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3843 {
3844         u32 addr;
3845
3846         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3847
3848         mac[5] = (u8)(addr & 0xFF);
3849         mac[4] = (u8)((addr >> 8) & 0xFF);
3850         mac[3] = (u8)((addr >> 16) & 0xFF);
3851         /* Use the OUI from the current MAC address */
3852         memcpy(mac, adapter->netdev->dev_addr, 3);
3853 }
3854
3855 /*
3856  * Generate a seed MAC address from the PF MAC Address using jhash.
3857  * MAC Address for VFs are assigned incrementally starting from the seed.
3858  * These addresses are programmed in the ASIC by the PF and the VF driver
3859  * queries for the MAC address during its probe.
3860  */
3861 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3862 {
3863         u32 vf;
3864         int status = 0;
3865         u8 mac[ETH_ALEN];
3866         struct be_vf_cfg *vf_cfg;
3867
3868         be_vf_eth_addr_generate(adapter, mac);
3869
3870         for_all_vfs(adapter, vf_cfg, vf) {
3871                 if (BEx_chip(adapter))
3872                         status = be_cmd_pmac_add(adapter, mac,
3873                                                  vf_cfg->if_handle,
3874                                                  &vf_cfg->pmac_id, vf + 1);
3875                 else
3876                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3877                                                 vf + 1);
3878
3879                 if (status)
3880                         dev_err(&adapter->pdev->dev,
3881                                 "Mac address assignment failed for VF %d\n",
3882                                 vf);
3883                 else
3884                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3885
3886                 mac[5] += 1;
3887         }
3888         return status;
3889 }
3890
3891 static int be_vfs_mac_query(struct be_adapter *adapter)
3892 {
3893         int status, vf;
3894         u8 mac[ETH_ALEN];
3895         struct be_vf_cfg *vf_cfg;
3896
3897         for_all_vfs(adapter, vf_cfg, vf) {
3898                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3899                                                mac, vf_cfg->if_handle,
3900                                                false, vf+1);
3901                 if (status)
3902                         return status;
3903                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3904         }
3905         return 0;
3906 }
3907
3908 static void be_vf_clear(struct be_adapter *adapter)
3909 {
3910         struct be_vf_cfg *vf_cfg;
3911         u32 vf;
3912
3913         if (pci_vfs_assigned(adapter->pdev)) {
3914                 dev_warn(&adapter->pdev->dev,
3915                          "VFs are assigned to VMs: not disabling VFs\n");
3916                 goto done;
3917         }
3918
3919         pci_disable_sriov(adapter->pdev);
3920
3921         for_all_vfs(adapter, vf_cfg, vf) {
3922                 if (BEx_chip(adapter))
3923                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3924                                         vf_cfg->pmac_id, vf + 1);
3925                 else
3926                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3927                                        vf + 1);
3928
3929                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3930         }
3931
3932         if (BE3_chip(adapter))
3933                 be_cmd_set_hsw_config(adapter, 0, 0,
3934                                       adapter->if_handle,
3935                                       PORT_FWD_TYPE_PASSTHRU, 0);
3936 done:
3937         kfree(adapter->vf_cfg);
3938         adapter->num_vfs = 0;
3939         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3940 }
3941
3942 static void be_clear_queues(struct be_adapter *adapter)
3943 {
3944         be_mcc_queues_destroy(adapter);
3945         be_rx_cqs_destroy(adapter);
3946         be_tx_queues_destroy(adapter);
3947         be_evt_queues_destroy(adapter);
3948 }
3949
3950 static void be_cancel_worker(struct be_adapter *adapter)
3951 {
3952         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3953                 cancel_delayed_work_sync(&adapter->work);
3954                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3955         }
3956 }
3957
3958 static void be_cancel_err_detection(struct be_adapter *adapter)
3959 {
3960         struct be_error_recovery *err_rec = &adapter->error_recovery;
3961
3962         if (!be_err_recovery_workq)
3963                 return;
3964
3965         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3966                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3967                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3968         }
3969 }
3970
3971 /* VxLAN offload Notes:
3972  *
3973  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3974  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3975  * is expected to work across all types of IP tunnels once exported. Skyhawk
3976  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3977  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3978  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3979  * those other tunnels are unexported on the fly through ndo_features_check().
3980  */
3981 static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3982                              unsigned int entry, struct udp_tunnel_info *ti)
3983 {
3984         struct be_adapter *adapter = netdev_priv(netdev);
3985         struct device *dev = &adapter->pdev->dev;
3986         int status;
3987
3988         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3989                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3990         if (status) {
3991                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3992                 return status;
3993         }
3994         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3995
3996         status = be_cmd_set_vxlan_port(adapter, ti->port);
3997         if (status) {
3998                 dev_warn(dev, "Failed to add VxLAN port\n");
3999                 return status;
4000         }
4001         adapter->vxlan_port = ti->port;
4002
4003         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4004                                    NETIF_F_TSO | NETIF_F_TSO6 |
4005                                    NETIF_F_GSO_UDP_TUNNEL;
4006
4007         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4008                  be16_to_cpu(ti->port));
4009         return 0;
4010 }
4011
4012 static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4013                                unsigned int entry, struct udp_tunnel_info *ti)
4014 {
4015         struct be_adapter *adapter = netdev_priv(netdev);
4016
4017         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4018                 be_cmd_manage_iface(adapter, adapter->if_handle,
4019                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4020
4021         if (adapter->vxlan_port)
4022                 be_cmd_set_vxlan_port(adapter, 0);
4023
4024         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4025         adapter->vxlan_port = 0;
4026
4027         netdev->hw_enc_features = 0;
4028         return 0;
4029 }
4030
4031 static const struct udp_tunnel_nic_info be_udp_tunnels = {
4032         .set_port       = be_vxlan_set_port,
4033         .unset_port     = be_vxlan_unset_port,
4034         .flags          = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4035         .tables         = {
4036                 { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4037         },
4038 };
4039
4040 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4041                                 struct be_resources *vft_res)
4042 {
4043         struct be_resources res = adapter->pool_res;
4044         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4045         struct be_resources res_mod = {0};
4046         u16 num_vf_qs = 1;
4047
4048         /* Distribute the queue resources among the PF and it's VFs */
4049         if (num_vfs) {
4050                 /* Divide the rx queues evenly among the VFs and the PF, capped
4051                  * at VF-EQ-count. Any remainder queues belong to the PF.
4052                  */
4053                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4054                                 res.max_rss_qs / (num_vfs + 1));
4055
4056                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4057                  * RSS Tables per port. Provide RSS on VFs, only if number of
4058                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4059                  */
4060                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4061                         num_vf_qs = 1;
4062         }
4063
4064         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4065          * which are modifiable using SET_PROFILE_CONFIG cmd.
4066          */
4067         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4068                                   RESOURCE_MODIFIABLE, 0);
4069
4070         /* If RSS IFACE capability flags are modifiable for a VF, set the
4071          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4072          * more than 1 RSSQ is available for a VF.
4073          * Otherwise, provision only 1 queue pair for VF.
4074          */
4075         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4076                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4077                 if (num_vf_qs > 1) {
4078                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4079                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4080                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4081                 } else {
4082                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4083                                              BE_IF_FLAGS_DEFQ_RSS);
4084                 }
4085         } else {
4086                 num_vf_qs = 1;
4087         }
4088
4089         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4090                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4091                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4092         }
4093
4094         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4095         vft_res->max_rx_qs = num_vf_qs;
4096         vft_res->max_rss_qs = num_vf_qs;
4097         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4098         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4099
4100         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4101          * among the PF and it's VFs, if the fields are changeable
4102          */
4103         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4104                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4105
4106         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4107                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4108
4109         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4110                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4111
4112         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4113                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4114 }
4115
4116 static void be_if_destroy(struct be_adapter *adapter)
4117 {
4118         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4119
4120         kfree(adapter->pmac_id);
4121         adapter->pmac_id = NULL;
4122
4123         kfree(adapter->mc_list);
4124         adapter->mc_list = NULL;
4125
4126         kfree(adapter->uc_list);
4127         adapter->uc_list = NULL;
4128 }
4129
4130 static int be_clear(struct be_adapter *adapter)
4131 {
4132         struct pci_dev *pdev = adapter->pdev;
4133         struct  be_resources vft_res = {0};
4134
4135         be_cancel_worker(adapter);
4136
4137         flush_workqueue(be_wq);
4138
4139         if (sriov_enabled(adapter))
4140                 be_vf_clear(adapter);
4141
4142         /* Re-configure FW to distribute resources evenly across max-supported
4143          * number of VFs, only when VFs are not already enabled.
4144          */
4145         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4146             !pci_vfs_assigned(pdev)) {
4147                 be_calculate_vf_res(adapter,
4148                                     pci_sriov_get_totalvfs(pdev),
4149                                     &vft_res);
4150                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4151                                         pci_sriov_get_totalvfs(pdev),
4152                                         &vft_res);
4153         }
4154
4155         be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4156
4157         be_if_destroy(adapter);
4158
4159         be_clear_queues(adapter);
4160
4161         be_msix_disable(adapter);
4162         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4163         return 0;
4164 }
4165
4166 static int be_vfs_if_create(struct be_adapter *adapter)
4167 {
4168         struct be_resources res = {0};
4169         u32 cap_flags, en_flags, vf;
4170         struct be_vf_cfg *vf_cfg;
4171         int status;
4172
4173         /* If a FW profile exists, then cap_flags are updated */
4174         cap_flags = BE_VF_IF_EN_FLAGS;
4175
4176         for_all_vfs(adapter, vf_cfg, vf) {
4177                 if (!BE3_chip(adapter)) {
4178                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4179                                                            ACTIVE_PROFILE_TYPE,
4180                                                            RESOURCE_LIMITS,
4181                                                            vf + 1);
4182                         if (!status) {
4183                                 cap_flags = res.if_cap_flags;
4184                                 /* Prevent VFs from enabling VLAN promiscuous
4185                                  * mode
4186                                  */
4187                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4188                         }
4189                 }
4190
4191                 /* PF should enable IF flags during proxy if_create call */
4192                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4193                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4194                                           &vf_cfg->if_handle, vf + 1);
4195                 if (status)
4196                         return status;
4197         }
4198
4199         return 0;
4200 }
4201
4202 static int be_vf_setup_init(struct be_adapter *adapter)
4203 {
4204         struct be_vf_cfg *vf_cfg;
4205         int vf;
4206
4207         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4208                                   GFP_KERNEL);
4209         if (!adapter->vf_cfg)
4210                 return -ENOMEM;
4211
4212         for_all_vfs(adapter, vf_cfg, vf) {
4213                 vf_cfg->if_handle = -1;
4214                 vf_cfg->pmac_id = -1;
4215         }
4216         return 0;
4217 }
4218
4219 static int be_vf_setup(struct be_adapter *adapter)
4220 {
4221         struct device *dev = &adapter->pdev->dev;
4222         struct be_vf_cfg *vf_cfg;
4223         int status, old_vfs, vf;
4224         bool spoofchk;
4225
4226         old_vfs = pci_num_vf(adapter->pdev);
4227
4228         status = be_vf_setup_init(adapter);
4229         if (status)
4230                 goto err;
4231
4232         if (old_vfs) {
4233                 for_all_vfs(adapter, vf_cfg, vf) {
4234                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4235                         if (status)
4236                                 goto err;
4237                 }
4238
4239                 status = be_vfs_mac_query(adapter);
4240                 if (status)
4241                         goto err;
4242         } else {
4243                 status = be_vfs_if_create(adapter);
4244                 if (status)
4245                         goto err;
4246
4247                 status = be_vf_eth_addr_config(adapter);
4248                 if (status)
4249                         goto err;
4250         }
4251
4252         for_all_vfs(adapter, vf_cfg, vf) {
4253                 /* Allow VFs to programs MAC/VLAN filters */
4254                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4255                                                   vf + 1);
4256                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4257                         status = be_cmd_set_fn_privileges(adapter,
4258                                                           vf_cfg->privileges |
4259                                                           BE_PRIV_FILTMGMT,
4260                                                           vf + 1);
4261                         if (!status) {
4262                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4263                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4264                                          vf);
4265                         }
4266                 }
4267
4268                 /* Allow full available bandwidth */
4269                 if (!old_vfs)
4270                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4271
4272                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4273                                                vf_cfg->if_handle, NULL,
4274                                                &spoofchk);
4275                 if (!status)
4276                         vf_cfg->spoofchk = spoofchk;
4277
4278                 if (!old_vfs) {
4279                         be_cmd_enable_vf(adapter, vf + 1);
4280                         be_cmd_set_logical_link_config(adapter,
4281                                                        IFLA_VF_LINK_STATE_AUTO,
4282                                                        vf+1);
4283                 }
4284         }
4285
4286         if (!old_vfs) {
4287                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4288                 if (status) {
4289                         dev_err(dev, "SRIOV enable failed\n");
4290                         adapter->num_vfs = 0;
4291                         goto err;
4292                 }
4293         }
4294
4295         if (BE3_chip(adapter)) {
4296                 /* On BE3, enable VEB only when SRIOV is enabled */
4297                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4298                                                adapter->if_handle,
4299                                                PORT_FWD_TYPE_VEB, 0);
4300                 if (status)
4301                         goto err;
4302         }
4303
4304         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4305         return 0;
4306 err:
4307         dev_err(dev, "VF setup failed\n");
4308         be_vf_clear(adapter);
4309         return status;
4310 }
4311
4312 /* Converting function_mode bits on BE3 to SH mc_type enums */
4313
4314 static u8 be_convert_mc_type(u32 function_mode)
4315 {
4316         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4317                 return vNIC1;
4318         else if (function_mode & QNQ_MODE)
4319                 return FLEX10;
4320         else if (function_mode & VNIC_MODE)
4321                 return vNIC2;
4322         else if (function_mode & UMC_ENABLED)
4323                 return UMC;
4324         else
4325                 return MC_NONE;
4326 }
4327
4328 /* On BE2/BE3 FW does not suggest the supported limits */
4329 static void BEx_get_resources(struct be_adapter *adapter,
4330                               struct be_resources *res)
4331 {
4332         bool use_sriov = adapter->num_vfs ? 1 : 0;
4333
4334         if (be_physfn(adapter))
4335                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4336         else
4337                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4338
4339         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4340
4341         if (be_is_mc(adapter)) {
4342                 /* Assuming that there are 4 channels per port,
4343                  * when multi-channel is enabled
4344                  */
4345                 if (be_is_qnq_mode(adapter))
4346                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4347                 else
4348                         /* In a non-qnq multichannel mode, the pvid
4349                          * takes up one vlan entry
4350                          */
4351                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4352         } else {
4353                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4354         }
4355
4356         res->max_mcast_mac = BE_MAX_MC;
4357
4358         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4359          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4360          *    *only* if it is RSS-capable.
4361          */
4362         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4363             be_virtfn(adapter) ||
4364             (be_is_mc(adapter) &&
4365              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4366                 res->max_tx_qs = 1;
4367         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4368                 struct be_resources super_nic_res = {0};
4369
4370                 /* On a SuperNIC profile, the driver needs to use the
4371                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4372                  */
4373                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4374                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4375                                           0);
4376                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4377                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4378         } else {
4379                 res->max_tx_qs = BE3_MAX_TX_QS;
4380         }
4381
4382         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4383             !use_sriov && be_physfn(adapter))
4384                 res->max_rss_qs = (adapter->be3_native) ?
4385                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4386         res->max_rx_qs = res->max_rss_qs + 1;
4387
4388         if (be_physfn(adapter))
4389                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4390                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4391         else
4392                 res->max_evt_qs = 1;
4393
4394         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4395         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4396         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4397                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4398 }
4399
4400 static void be_setup_init(struct be_adapter *adapter)
4401 {
4402         adapter->vlan_prio_bmap = 0xff;
4403         adapter->phy.link_speed = -1;
4404         adapter->if_handle = -1;
4405         adapter->be3_native = false;
4406         adapter->if_flags = 0;
4407         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4408         if (be_physfn(adapter))
4409                 adapter->cmd_privileges = MAX_PRIVILEGES;
4410         else
4411                 adapter->cmd_privileges = MIN_PRIVILEGES;
4412 }
4413
4414 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4415  * However, this HW limitation is not exposed to the host via any SLI cmd.
4416  * As a result, in the case of SRIOV and in particular multi-partition configs
4417  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4418  * for distribution between the VFs. This self-imposed limit will determine the
4419  * no: of VFs for which RSS can be enabled.
4420  */
4421 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4422 {
4423         struct be_port_resources port_res = {0};
4424         u8 rss_tables_on_port;
4425         u16 max_vfs = be_max_vfs(adapter);
4426
4427         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4428                                   RESOURCE_LIMITS, 0);
4429
4430         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4431
4432         /* Each PF Pool's RSS Tables limit =
4433          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4434          */
4435         adapter->pool_res.max_rss_tables =
4436                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4437 }
4438
4439 static int be_get_sriov_config(struct be_adapter *adapter)
4440 {
4441         struct be_resources res = {0};
4442         int max_vfs, old_vfs;
4443
4444         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4445                                   RESOURCE_LIMITS, 0);
4446
4447         /* Some old versions of BE3 FW don't report max_vfs value */
4448         if (BE3_chip(adapter) && !res.max_vfs) {
4449                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4450                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4451         }
4452
4453         adapter->pool_res = res;
4454
4455         /* If during previous unload of the driver, the VFs were not disabled,
4456          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4457          * Instead use the TotalVFs value stored in the pci-dev struct.
4458          */
4459         old_vfs = pci_num_vf(adapter->pdev);
4460         if (old_vfs) {
4461                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4462                          old_vfs);
4463
4464                 adapter->pool_res.max_vfs =
4465                         pci_sriov_get_totalvfs(adapter->pdev);
4466                 adapter->num_vfs = old_vfs;
4467         }
4468
4469         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4470                 be_calculate_pf_pool_rss_tables(adapter);
4471                 dev_info(&adapter->pdev->dev,
4472                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4473                          be_max_pf_pool_rss_tables(adapter));
4474         }
4475         return 0;
4476 }
4477
4478 static void be_alloc_sriov_res(struct be_adapter *adapter)
4479 {
4480         int old_vfs = pci_num_vf(adapter->pdev);
4481         struct  be_resources vft_res = {0};
4482         int status;
4483
4484         be_get_sriov_config(adapter);
4485
4486         if (!old_vfs)
4487                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4488
4489         /* When the HW is in SRIOV capable configuration, the PF-pool
4490          * resources are given to PF during driver load, if there are no
4491          * old VFs. This facility is not available in BE3 FW.
4492          * Also, this is done by FW in Lancer chip.
4493          */
4494         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4495                 be_calculate_vf_res(adapter, 0, &vft_res);
4496                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4497                                                  &vft_res);
4498                 if (status)
4499                         dev_err(&adapter->pdev->dev,
4500                                 "Failed to optimize SRIOV resources\n");
4501         }
4502 }
4503
4504 static int be_get_resources(struct be_adapter *adapter)
4505 {
4506         struct device *dev = &adapter->pdev->dev;
4507         struct be_resources res = {0};
4508         int status;
4509
4510         /* For Lancer, SH etc read per-function resource limits from FW.
4511          * GET_FUNC_CONFIG returns per function guaranteed limits.
4512          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4513          */
4514         if (BEx_chip(adapter)) {
4515                 BEx_get_resources(adapter, &res);
4516         } else {
4517                 status = be_cmd_get_func_config(adapter, &res);
4518                 if (status)
4519                         return status;
4520
4521                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4522                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4523                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4524                         res.max_rss_qs -= 1;
4525         }
4526
4527         /* If RoCE is supported stash away half the EQs for RoCE */
4528         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4529                                 res.max_evt_qs / 2 : res.max_evt_qs;
4530         adapter->res = res;
4531
4532         /* If FW supports RSS default queue, then skip creating non-RSS
4533          * queue for non-IP traffic.
4534          */
4535         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4536                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4537
4538         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4539                  be_max_txqs(adapter), be_max_rxqs(adapter),
4540                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4541                  be_max_vfs(adapter));
4542         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4543                  be_max_uc(adapter), be_max_mc(adapter),
4544                  be_max_vlans(adapter));
4545
4546         /* Ensure RX and TX queues are created in pairs at init time */
4547         adapter->cfg_num_rx_irqs =
4548                                 min_t(u16, netif_get_num_default_rss_queues(),
4549                                       be_max_qp_irqs(adapter));
4550         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4551         return 0;
4552 }
4553
4554 static int be_get_config(struct be_adapter *adapter)
4555 {
4556         int status, level;
4557         u16 profile_id;
4558
4559         status = be_cmd_get_cntl_attributes(adapter);
4560         if (status)
4561                 return status;
4562
4563         status = be_cmd_query_fw_cfg(adapter);
4564         if (status)
4565                 return status;
4566
4567         if (!lancer_chip(adapter) && be_physfn(adapter))
4568                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4569
4570         if (BEx_chip(adapter)) {
4571                 level = be_cmd_get_fw_log_level(adapter);
4572                 adapter->msg_enable =
4573                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4574         }
4575
4576         be_cmd_get_acpi_wol_cap(adapter);
4577         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4578         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4579
4580         be_cmd_query_port_name(adapter);
4581
4582         if (be_physfn(adapter)) {
4583                 status = be_cmd_get_active_profile(adapter, &profile_id);
4584                 if (!status)
4585                         dev_info(&adapter->pdev->dev,
4586                                  "Using profile 0x%x\n", profile_id);
4587         }
4588
4589         return 0;
4590 }
4591
4592 static int be_mac_setup(struct be_adapter *adapter)
4593 {
4594         u8 mac[ETH_ALEN];
4595         int status;
4596
4597         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4598                 status = be_cmd_get_perm_mac(adapter, mac);
4599                 if (status)
4600                         return status;
4601
4602                 eth_hw_addr_set(adapter->netdev, mac);
4603                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4604
4605                 /* Initial MAC for BE3 VFs is already programmed by PF */
4606                 if (BEx_chip(adapter) && be_virtfn(adapter))
4607                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4608         }
4609
4610         return 0;
4611 }
4612
4613 static void be_schedule_worker(struct be_adapter *adapter)
4614 {
4615         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4616         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4617 }
4618
4619 static void be_destroy_err_recovery_workq(void)
4620 {
4621         if (!be_err_recovery_workq)
4622                 return;
4623
4624         destroy_workqueue(be_err_recovery_workq);
4625         be_err_recovery_workq = NULL;
4626 }
4627
4628 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4629 {
4630         struct be_error_recovery *err_rec = &adapter->error_recovery;
4631
4632         if (!be_err_recovery_workq)
4633                 return;
4634
4635         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4636                            msecs_to_jiffies(delay));
4637         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4638 }
4639
4640 static int be_setup_queues(struct be_adapter *adapter)
4641 {
4642         struct net_device *netdev = adapter->netdev;
4643         int status;
4644
4645         status = be_evt_queues_create(adapter);
4646         if (status)
4647                 goto err;
4648
4649         status = be_tx_qs_create(adapter);
4650         if (status)
4651                 goto err;
4652
4653         status = be_rx_cqs_create(adapter);
4654         if (status)
4655                 goto err;
4656
4657         status = be_mcc_queues_create(adapter);
4658         if (status)
4659                 goto err;
4660
4661         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4662         if (status)
4663                 goto err;
4664
4665         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4666         if (status)
4667                 goto err;
4668
4669         return 0;
4670 err:
4671         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4672         return status;
4673 }
4674
4675 static int be_if_create(struct be_adapter *adapter)
4676 {
4677         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4678         u32 cap_flags = be_if_cap_flags(adapter);
4679
4680         /* alloc required memory for other filtering fields */
4681         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4682                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4683         if (!adapter->pmac_id)
4684                 return -ENOMEM;
4685
4686         adapter->mc_list = kcalloc(be_max_mc(adapter),
4687                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4688         if (!adapter->mc_list)
4689                 return -ENOMEM;
4690
4691         adapter->uc_list = kcalloc(be_max_uc(adapter),
4692                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4693         if (!adapter->uc_list)
4694                 return -ENOMEM;
4695
4696         if (adapter->cfg_num_rx_irqs == 1)
4697                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4698
4699         en_flags &= cap_flags;
4700         /* will enable all the needed filter flags in be_open() */
4701         return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4702                                   &adapter->if_handle, 0);
4703 }
4704
4705 int be_update_queues(struct be_adapter *adapter)
4706 {
4707         struct net_device *netdev = adapter->netdev;
4708         int status;
4709
4710         if (netif_running(netdev)) {
4711                 /* be_tx_timeout() must not run concurrently with this
4712                  * function, synchronize with an already-running dev_watchdog
4713                  */
4714                 netif_tx_lock_bh(netdev);
4715                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4716                 netif_carrier_off(netdev);
4717                 netif_tx_unlock_bh(netdev);
4718
4719                 be_close(netdev);
4720         }
4721
4722         be_cancel_worker(adapter);
4723
4724         /* If any vectors have been shared with RoCE we cannot re-program
4725          * the MSIx table.
4726          */
4727         if (!adapter->num_msix_roce_vec)
4728                 be_msix_disable(adapter);
4729
4730         be_clear_queues(adapter);
4731         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4732         if (status)
4733                 return status;
4734
4735         if (!msix_enabled(adapter)) {
4736                 status = be_msix_enable(adapter);
4737                 if (status)
4738                         return status;
4739         }
4740
4741         status = be_if_create(adapter);
4742         if (status)
4743                 return status;
4744
4745         status = be_setup_queues(adapter);
4746         if (status)
4747                 return status;
4748
4749         be_schedule_worker(adapter);
4750
4751         /* The IF was destroyed and re-created. We need to clear
4752          * all promiscuous flags valid for the destroyed IF.
4753          * Without this promisc mode is not restored during
4754          * be_open() because the driver thinks that it is
4755          * already enabled in HW.
4756          */
4757         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4758
4759         if (netif_running(netdev))
4760                 status = be_open(netdev);
4761
4762         return status;
4763 }
4764
4765 static inline int fw_major_num(const char *fw_ver)
4766 {
4767         int fw_major = 0, i;
4768
4769         i = sscanf(fw_ver, "%d.", &fw_major);
4770         if (i != 1)
4771                 return 0;
4772
4773         return fw_major;
4774 }
4775
4776 /* If it is error recovery, FLR the PF
4777  * Else if any VFs are already enabled don't FLR the PF
4778  */
4779 static bool be_reset_required(struct be_adapter *adapter)
4780 {
4781         if (be_error_recovering(adapter))
4782                 return true;
4783         else
4784                 return pci_num_vf(adapter->pdev) == 0;
4785 }
4786
4787 /* Wait for the FW to be ready and perform the required initialization */
4788 static int be_func_init(struct be_adapter *adapter)
4789 {
4790         int status;
4791
4792         status = be_fw_wait_ready(adapter);
4793         if (status)
4794                 return status;
4795
4796         /* FW is now ready; clear errors to allow cmds/doorbell */
4797         be_clear_error(adapter, BE_CLEAR_ALL);
4798
4799         if (be_reset_required(adapter)) {
4800                 status = be_cmd_reset_function(adapter);
4801                 if (status)
4802                         return status;
4803
4804                 /* Wait for interrupts to quiesce after an FLR */
4805                 msleep(100);
4806         }
4807
4808         /* Tell FW we're ready to fire cmds */
4809         status = be_cmd_fw_init(adapter);
4810         if (status)
4811                 return status;
4812
4813         /* Allow interrupts for other ULPs running on NIC function */
4814         be_intr_set(adapter, true);
4815
4816         return 0;
4817 }
4818
4819 static int be_setup(struct be_adapter *adapter)
4820 {
4821         struct device *dev = &adapter->pdev->dev;
4822         int status;
4823
4824         status = be_func_init(adapter);
4825         if (status)
4826                 return status;
4827
4828         be_setup_init(adapter);
4829
4830         if (!lancer_chip(adapter))
4831                 be_cmd_req_native_mode(adapter);
4832
4833         /* invoke this cmd first to get pf_num and vf_num which are needed
4834          * for issuing profile related cmds
4835          */
4836         if (!BEx_chip(adapter)) {
4837                 status = be_cmd_get_func_config(adapter, NULL);
4838                 if (status)
4839                         return status;
4840         }
4841
4842         status = be_get_config(adapter);
4843         if (status)
4844                 goto err;
4845
4846         if (!BE2_chip(adapter) && be_physfn(adapter))
4847                 be_alloc_sriov_res(adapter);
4848
4849         status = be_get_resources(adapter);
4850         if (status)
4851                 goto err;
4852
4853         status = be_msix_enable(adapter);
4854         if (status)
4855                 goto err;
4856
4857         /* will enable all the needed filter flags in be_open() */
4858         status = be_if_create(adapter);
4859         if (status)
4860                 goto err;
4861
4862         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4863         rtnl_lock();
4864         status = be_setup_queues(adapter);
4865         rtnl_unlock();
4866         if (status)
4867                 goto err;
4868
4869         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4870
4871         status = be_mac_setup(adapter);
4872         if (status)
4873                 goto err;
4874
4875         be_cmd_get_fw_ver(adapter);
4876         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4877
4878         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4879                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4880                         adapter->fw_ver);
4881                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4882         }
4883
4884         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4885                                          adapter->rx_fc);
4886         if (status)
4887                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4888                                         &adapter->rx_fc);
4889
4890         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4891                  adapter->tx_fc, adapter->rx_fc);
4892
4893         if (be_physfn(adapter))
4894                 be_cmd_set_logical_link_config(adapter,
4895                                                IFLA_VF_LINK_STATE_AUTO, 0);
4896
4897         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4898          * confusing a linux bridge or OVS that it might be connected to.
4899          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4900          * when SRIOV is not enabled.
4901          */
4902         if (BE3_chip(adapter))
4903                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4904                                       PORT_FWD_TYPE_PASSTHRU, 0);
4905
4906         if (adapter->num_vfs)
4907                 be_vf_setup(adapter);
4908
4909         status = be_cmd_get_phy_info(adapter);
4910         if (!status && be_pause_supported(adapter))
4911                 adapter->phy.fc_autoneg = 1;
4912
4913         if (be_physfn(adapter) && !lancer_chip(adapter))
4914                 be_cmd_set_features(adapter);
4915
4916         be_schedule_worker(adapter);
4917         adapter->flags |= BE_FLAGS_SETUP_DONE;
4918         return 0;
4919 err:
4920         be_clear(adapter);
4921         return status;
4922 }
4923
4924 #ifdef CONFIG_NET_POLL_CONTROLLER
4925 static void be_netpoll(struct net_device *netdev)
4926 {
4927         struct be_adapter *adapter = netdev_priv(netdev);
4928         struct be_eq_obj *eqo;
4929         int i;
4930
4931         for_all_evt_queues(adapter, eqo, i) {
4932                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4933                 napi_schedule(&eqo->napi);
4934         }
4935 }
4936 #endif
4937
4938 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4939 {
4940         const struct firmware *fw;
4941         int status;
4942
4943         if (!netif_running(adapter->netdev)) {
4944                 dev_err(&adapter->pdev->dev,
4945                         "Firmware load not allowed (interface is down)\n");
4946                 return -ENETDOWN;
4947         }
4948
4949         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4950         if (status)
4951                 goto fw_exit;
4952
4953         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4954
4955         if (lancer_chip(adapter))
4956                 status = lancer_fw_download(adapter, fw);
4957         else
4958                 status = be_fw_download(adapter, fw);
4959
4960         if (!status)
4961                 be_cmd_get_fw_ver(adapter);
4962
4963 fw_exit:
4964         release_firmware(fw);
4965         return status;
4966 }
4967
4968 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4969                                  u16 flags, struct netlink_ext_ack *extack)
4970 {
4971         struct be_adapter *adapter = netdev_priv(dev);
4972         struct nlattr *attr, *br_spec;
4973         int rem;
4974         int status = 0;
4975         u16 mode = 0;
4976
4977         if (!sriov_enabled(adapter))
4978                 return -EOPNOTSUPP;
4979
4980         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4981         if (!br_spec)
4982                 return -EINVAL;
4983
4984         nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) {
4985                 mode = nla_get_u16(attr);
4986                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4987                         return -EOPNOTSUPP;
4988
4989                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4990                         return -EINVAL;
4991
4992                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4993                                                adapter->if_handle,
4994                                                mode == BRIDGE_MODE_VEPA ?
4995                                                PORT_FWD_TYPE_VEPA :
4996                                                PORT_FWD_TYPE_VEB, 0);
4997                 if (status)
4998                         goto err;
4999
5000                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5001                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5002
5003                 return status;
5004         }
5005 err:
5006         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5007                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5008
5009         return status;
5010 }
5011
5012 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5013                                  struct net_device *dev, u32 filter_mask,
5014                                  int nlflags)
5015 {
5016         struct be_adapter *adapter = netdev_priv(dev);
5017         int status = 0;
5018         u8 hsw_mode;
5019
5020         /* BE and Lancer chips support VEB mode only */
5021         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5022                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5023                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5024                         return 0;
5025                 hsw_mode = PORT_FWD_TYPE_VEB;
5026         } else {
5027                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5028                                                adapter->if_handle, &hsw_mode,
5029                                                NULL);
5030                 if (status)
5031                         return 0;
5032
5033                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5034                         return 0;
5035         }
5036
5037         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5038                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5039                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5040                                        0, 0, nlflags, filter_mask, NULL);
5041 }
5042
5043 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5044                                          void (*func)(struct work_struct *))
5045 {
5046         struct be_cmd_work *work;
5047
5048         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5049         if (!work) {
5050                 dev_err(&adapter->pdev->dev,
5051                         "be_work memory allocation failed\n");
5052                 return NULL;
5053         }
5054
5055         INIT_WORK(&work->work, func);
5056         work->adapter = adapter;
5057         return work;
5058 }
5059
5060 static netdev_features_t be_features_check(struct sk_buff *skb,
5061                                            struct net_device *dev,
5062                                            netdev_features_t features)
5063 {
5064         struct be_adapter *adapter = netdev_priv(dev);
5065         u8 l4_hdr = 0;
5066
5067         if (skb_is_gso(skb)) {
5068                 /* IPv6 TSO requests with extension hdrs are a problem
5069                  * to Lancer and BE3 HW. Disable TSO6 feature.
5070                  */
5071                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5072                         features &= ~NETIF_F_TSO6;
5073
5074                 /* Lancer cannot handle the packet with MSS less than 256.
5075                  * Also it can't handle a TSO packet with a single segment
5076                  * Disable the GSO support in such cases
5077                  */
5078                 if (lancer_chip(adapter) &&
5079                     (skb_shinfo(skb)->gso_size < 256 ||
5080                      skb_shinfo(skb)->gso_segs == 1))
5081                         features &= ~NETIF_F_GSO_MASK;
5082         }
5083
5084         /* The code below restricts offload features for some tunneled and
5085          * Q-in-Q packets.
5086          * Offload features for normal (non tunnel) packets are unchanged.
5087          */
5088         features = vlan_features_check(skb, features);
5089         if (!skb->encapsulation ||
5090             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5091                 return features;
5092
5093         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5094          * should disable tunnel offload features if it's not a VxLAN packet,
5095          * as tunnel offloads have been enabled only for VxLAN. This is done to
5096          * allow other tunneled traffic like GRE work fine while VxLAN
5097          * offloads are configured in Skyhawk-R.
5098          */
5099         switch (vlan_get_protocol(skb)) {
5100         case htons(ETH_P_IP):
5101                 l4_hdr = ip_hdr(skb)->protocol;
5102                 break;
5103         case htons(ETH_P_IPV6):
5104                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5105                 break;
5106         default:
5107                 return features;
5108         }
5109
5110         if (l4_hdr != IPPROTO_UDP ||
5111             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5112             skb->inner_protocol != htons(ETH_P_TEB) ||
5113             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5114                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5115             !adapter->vxlan_port ||
5116             udp_hdr(skb)->dest != adapter->vxlan_port)
5117                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5118
5119         return features;
5120 }
5121
5122 static int be_get_phys_port_id(struct net_device *dev,
5123                                struct netdev_phys_item_id *ppid)
5124 {
5125         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5126         struct be_adapter *adapter = netdev_priv(dev);
5127         u8 *id;
5128
5129         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5130                 return -ENOSPC;
5131
5132         ppid->id[0] = adapter->hba_port_num + 1;
5133         id = &ppid->id[1];
5134         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5135              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5136                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5137
5138         ppid->id_len = id_len;
5139
5140         return 0;
5141 }
5142
5143 static void be_set_rx_mode(struct net_device *dev)
5144 {
5145         struct be_adapter *adapter = netdev_priv(dev);
5146         struct be_cmd_work *work;
5147
5148         work = be_alloc_work(adapter, be_work_set_rx_mode);
5149         if (work)
5150                 queue_work(be_wq, &work->work);
5151 }
5152
5153 static const struct net_device_ops be_netdev_ops = {
5154         .ndo_open               = be_open,
5155         .ndo_stop               = be_close,
5156         .ndo_start_xmit         = be_xmit,
5157         .ndo_set_rx_mode        = be_set_rx_mode,
5158         .ndo_set_mac_address    = be_mac_addr_set,
5159         .ndo_get_stats64        = be_get_stats64,
5160         .ndo_validate_addr      = eth_validate_addr,
5161         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5162         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5163         .ndo_set_vf_mac         = be_set_vf_mac,
5164         .ndo_set_vf_vlan        = be_set_vf_vlan,
5165         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5166         .ndo_get_vf_config      = be_get_vf_config,
5167         .ndo_set_vf_link_state  = be_set_vf_link_state,
5168         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5169         .ndo_tx_timeout         = be_tx_timeout,
5170 #ifdef CONFIG_NET_POLL_CONTROLLER
5171         .ndo_poll_controller    = be_netpoll,
5172 #endif
5173         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5174         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5175         .ndo_features_check     = be_features_check,
5176         .ndo_get_phys_port_id   = be_get_phys_port_id,
5177 };
5178
5179 static void be_netdev_init(struct net_device *netdev)
5180 {
5181         struct be_adapter *adapter = netdev_priv(netdev);
5182
5183         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5184                 NETIF_F_GSO_UDP_TUNNEL |
5185                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5186                 NETIF_F_HW_VLAN_CTAG_TX;
5187         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5188                 netdev->hw_features |= NETIF_F_RXHASH;
5189
5190         netdev->features |= netdev->hw_features |
5191                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER |
5192                 NETIF_F_HIGHDMA;
5193
5194         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5195                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5196
5197         netdev->priv_flags |= IFF_UNICAST_FLT;
5198
5199         netdev->flags |= IFF_MULTICAST;
5200
5201         netif_set_tso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5202
5203         netdev->netdev_ops = &be_netdev_ops;
5204
5205         netdev->ethtool_ops = &be_ethtool_ops;
5206
5207         if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5208                 netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5209
5210         /* MTU range: 256 - 9000 */
5211         netdev->min_mtu = BE_MIN_MTU;
5212         netdev->max_mtu = BE_MAX_MTU;
5213 }
5214
5215 static void be_cleanup(struct be_adapter *adapter)
5216 {
5217         struct net_device *netdev = adapter->netdev;
5218
5219         rtnl_lock();
5220         netif_device_detach(netdev);
5221         if (netif_running(netdev))
5222                 be_close(netdev);
5223         rtnl_unlock();
5224
5225         be_clear(adapter);
5226 }
5227
5228 static int be_resume(struct be_adapter *adapter)
5229 {
5230         struct net_device *netdev = adapter->netdev;
5231         int status;
5232
5233         status = be_setup(adapter);
5234         if (status)
5235                 return status;
5236
5237         rtnl_lock();
5238         if (netif_running(netdev))
5239                 status = be_open(netdev);
5240         rtnl_unlock();
5241
5242         if (status)
5243                 return status;
5244
5245         netif_device_attach(netdev);
5246
5247         return 0;
5248 }
5249
5250 static void be_soft_reset(struct be_adapter *adapter)
5251 {
5252         u32 val;
5253
5254         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5255         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5256         val |= SLIPORT_SOFTRESET_SR_MASK;
5257         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5258 }
5259
5260 static bool be_err_is_recoverable(struct be_adapter *adapter)
5261 {
5262         struct be_error_recovery *err_rec = &adapter->error_recovery;
5263         unsigned long initial_idle_time =
5264                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5265         unsigned long recovery_interval =
5266                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5267         u16 ue_err_code;
5268         u32 val;
5269
5270         val = be_POST_stage_get(adapter);
5271         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5272                 return false;
5273         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5274         if (ue_err_code == 0)
5275                 return false;
5276
5277         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5278                 ue_err_code);
5279
5280         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5281                 dev_err(&adapter->pdev->dev,
5282                         "Cannot recover within %lu sec from driver load\n",
5283                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5284                 return false;
5285         }
5286
5287         if (err_rec->last_recovery_time && time_before_eq(
5288                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5289                 dev_err(&adapter->pdev->dev,
5290                         "Cannot recover within %lu sec from last recovery\n",
5291                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5292                 return false;
5293         }
5294
5295         if (ue_err_code == err_rec->last_err_code) {
5296                 dev_err(&adapter->pdev->dev,
5297                         "Cannot recover from a consecutive TPE error\n");
5298                 return false;
5299         }
5300
5301         err_rec->last_recovery_time = jiffies;
5302         err_rec->last_err_code = ue_err_code;
5303         return true;
5304 }
5305
5306 static int be_tpe_recover(struct be_adapter *adapter)
5307 {
5308         struct be_error_recovery *err_rec = &adapter->error_recovery;
5309         int status = -EAGAIN;
5310         u32 val;
5311
5312         switch (err_rec->recovery_state) {
5313         case ERR_RECOVERY_ST_NONE:
5314                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5315                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5316                 break;
5317
5318         case ERR_RECOVERY_ST_DETECT:
5319                 val = be_POST_stage_get(adapter);
5320                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5321                     POST_STAGE_RECOVERABLE_ERR) {
5322                         dev_err(&adapter->pdev->dev,
5323                                 "Unrecoverable HW error detected: 0x%x\n", val);
5324                         status = -EINVAL;
5325                         err_rec->resched_delay = 0;
5326                         break;
5327                 }
5328
5329                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5330
5331                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5332                  * milliseconds before it checks for final error status in
5333                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5334                  * If it does, then PF0 initiates a Soft Reset.
5335                  */
5336                 if (adapter->pf_num == 0) {
5337                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5338                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5339                                         ERR_RECOVERY_UE_DETECT_DURATION;
5340                         break;
5341                 }
5342
5343                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5344                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5345                                         ERR_RECOVERY_UE_DETECT_DURATION;
5346                 break;
5347
5348         case ERR_RECOVERY_ST_RESET:
5349                 if (!be_err_is_recoverable(adapter)) {
5350                         dev_err(&adapter->pdev->dev,
5351                                 "Failed to meet recovery criteria\n");
5352                         status = -EIO;
5353                         err_rec->resched_delay = 0;
5354                         break;
5355                 }
5356                 be_soft_reset(adapter);
5357                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5358                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5359                                         err_rec->ue_to_reset_time;
5360                 break;
5361
5362         case ERR_RECOVERY_ST_PRE_POLL:
5363                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5364                 err_rec->resched_delay = 0;
5365                 status = 0;                     /* done */
5366                 break;
5367
5368         default:
5369                 status = -EINVAL;
5370                 err_rec->resched_delay = 0;
5371                 break;
5372         }
5373
5374         return status;
5375 }
5376
5377 static int be_err_recover(struct be_adapter *adapter)
5378 {
5379         int status;
5380
5381         if (!lancer_chip(adapter)) {
5382                 if (!adapter->error_recovery.recovery_supported ||
5383                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5384                         return -EIO;
5385                 status = be_tpe_recover(adapter);
5386                 if (status)
5387                         goto err;
5388         }
5389
5390         /* Wait for adapter to reach quiescent state before
5391          * destroying queues
5392          */
5393         status = be_fw_wait_ready(adapter);
5394         if (status)
5395                 goto err;
5396
5397         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5398
5399         be_cleanup(adapter);
5400
5401         status = be_resume(adapter);
5402         if (status)
5403                 goto err;
5404
5405         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5406
5407 err:
5408         return status;
5409 }
5410
5411 static void be_err_detection_task(struct work_struct *work)
5412 {
5413         struct be_error_recovery *err_rec =
5414                         container_of(work, struct be_error_recovery,
5415                                      err_detection_work.work);
5416         struct be_adapter *adapter =
5417                         container_of(err_rec, struct be_adapter,
5418                                      error_recovery);
5419         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5420         struct device *dev = &adapter->pdev->dev;
5421         int recovery_status;
5422
5423         be_detect_error(adapter);
5424         if (!be_check_error(adapter, BE_ERROR_HW))
5425                 goto reschedule_task;
5426
5427         recovery_status = be_err_recover(adapter);
5428         if (!recovery_status) {
5429                 err_rec->recovery_retries = 0;
5430                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5431                 dev_info(dev, "Adapter recovery successful\n");
5432                 goto reschedule_task;
5433         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5434                 /* BEx/SH recovery state machine */
5435                 if (adapter->pf_num == 0 &&
5436                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5437                         dev_err(&adapter->pdev->dev,
5438                                 "Adapter recovery in progress\n");
5439                 resched_delay = err_rec->resched_delay;
5440                 goto reschedule_task;
5441         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5442                 /* For VFs, check if PF have allocated resources
5443                  * every second.
5444                  */
5445                 dev_err(dev, "Re-trying adapter recovery\n");
5446                 goto reschedule_task;
5447         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5448                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5449                 /* In case of another error during recovery, it takes 30 sec
5450                  * for adapter to come out of error. Retry error recovery after
5451                  * this time interval.
5452                  */
5453                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5454                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5455                 goto reschedule_task;
5456         } else {
5457                 dev_err(dev, "Adapter recovery failed\n");
5458                 dev_err(dev, "Please reboot server to recover\n");
5459         }
5460
5461         return;
5462
5463 reschedule_task:
5464         be_schedule_err_detection(adapter, resched_delay);
5465 }
5466
5467 static void be_log_sfp_info(struct be_adapter *adapter)
5468 {
5469         int status;
5470
5471         status = be_cmd_query_sfp_info(adapter);
5472         if (!status) {
5473                 dev_err(&adapter->pdev->dev,
5474                         "Port %c: %s Vendor: %s part no: %s",
5475                         adapter->port_name,
5476                         be_misconfig_evt_port_state[adapter->phy_state],
5477                         adapter->phy.vendor_name,
5478                         adapter->phy.vendor_pn);
5479         }
5480         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5481 }
5482
5483 static void be_worker(struct work_struct *work)
5484 {
5485         struct be_adapter *adapter =
5486                 container_of(work, struct be_adapter, work.work);
5487         struct be_rx_obj *rxo;
5488         int i;
5489
5490         if (be_physfn(adapter) &&
5491             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5492                 be_cmd_get_die_temperature(adapter);
5493
5494         /* when interrupts are not yet enabled, just reap any pending
5495          * mcc completions
5496          */
5497         if (!netif_running(adapter->netdev)) {
5498                 local_bh_disable();
5499                 be_process_mcc(adapter);
5500                 local_bh_enable();
5501                 goto reschedule;
5502         }
5503
5504         if (!adapter->stats_cmd_sent) {
5505                 if (lancer_chip(adapter))
5506                         lancer_cmd_get_pport_stats(adapter,
5507                                                    &adapter->stats_cmd);
5508                 else
5509                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5510         }
5511
5512         for_all_rx_queues(adapter, rxo, i) {
5513                 /* Replenish RX-queues starved due to memory
5514                  * allocation failures.
5515                  */
5516                 if (rxo->rx_post_starved)
5517                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5518         }
5519
5520         /* EQ-delay update for Skyhawk is done while notifying EQ */
5521         if (!skyhawk_chip(adapter))
5522                 be_eqd_update(adapter, false);
5523
5524         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5525                 be_log_sfp_info(adapter);
5526
5527 reschedule:
5528         adapter->work_counter++;
5529         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5530 }
5531
5532 static void be_unmap_pci_bars(struct be_adapter *adapter)
5533 {
5534         if (adapter->csr)
5535                 pci_iounmap(adapter->pdev, adapter->csr);
5536         if (adapter->db)
5537                 pci_iounmap(adapter->pdev, adapter->db);
5538         if (adapter->pcicfg && adapter->pcicfg_mapped)
5539                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5540 }
5541
5542 static int db_bar(struct be_adapter *adapter)
5543 {
5544         if (lancer_chip(adapter) || be_virtfn(adapter))
5545                 return 0;
5546         else
5547                 return 4;
5548 }
5549
5550 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5551 {
5552         if (skyhawk_chip(adapter)) {
5553                 adapter->roce_db.size = 4096;
5554                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5555                                                               db_bar(adapter));
5556                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5557                                                                db_bar(adapter));
5558         }
5559         return 0;
5560 }
5561
5562 static int be_map_pci_bars(struct be_adapter *adapter)
5563 {
5564         struct pci_dev *pdev = adapter->pdev;
5565         u8 __iomem *addr;
5566         u32 sli_intf;
5567
5568         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5569         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5570                                 SLI_INTF_FAMILY_SHIFT;
5571         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5572
5573         if (BEx_chip(adapter) && be_physfn(adapter)) {
5574                 adapter->csr = pci_iomap(pdev, 2, 0);
5575                 if (!adapter->csr)
5576                         return -ENOMEM;
5577         }
5578
5579         addr = pci_iomap(pdev, db_bar(adapter), 0);
5580         if (!addr)
5581                 goto pci_map_err;
5582         adapter->db = addr;
5583
5584         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5585                 if (be_physfn(adapter)) {
5586                         /* PCICFG is the 2nd BAR in BE2 */
5587                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5588                         if (!addr)
5589                                 goto pci_map_err;
5590                         adapter->pcicfg = addr;
5591                         adapter->pcicfg_mapped = true;
5592                 } else {
5593                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5594                         adapter->pcicfg_mapped = false;
5595                 }
5596         }
5597
5598         be_roce_map_pci_bars(adapter);
5599         return 0;
5600
5601 pci_map_err:
5602         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5603         be_unmap_pci_bars(adapter);
5604         return -ENOMEM;
5605 }
5606
5607 static void be_drv_cleanup(struct be_adapter *adapter)
5608 {
5609         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5610         struct device *dev = &adapter->pdev->dev;
5611
5612         if (mem->va)
5613                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5614
5615         mem = &adapter->rx_filter;
5616         if (mem->va)
5617                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5618
5619         mem = &adapter->stats_cmd;
5620         if (mem->va)
5621                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5622 }
5623
5624 /* Allocate and initialize various fields in be_adapter struct */
5625 static int be_drv_init(struct be_adapter *adapter)
5626 {
5627         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5628         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5629         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5630         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5631         struct device *dev = &adapter->pdev->dev;
5632         int status = 0;
5633
5634         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5635         mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5636                                                 &mbox_mem_alloc->dma,
5637                                                 GFP_KERNEL);
5638         if (!mbox_mem_alloc->va)
5639                 return -ENOMEM;
5640
5641         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5642         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5643         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5644
5645         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5646         rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5647                                            &rx_filter->dma, GFP_KERNEL);
5648         if (!rx_filter->va) {
5649                 status = -ENOMEM;
5650                 goto free_mbox;
5651         }
5652
5653         if (lancer_chip(adapter))
5654                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5655         else if (BE2_chip(adapter))
5656                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5657         else if (BE3_chip(adapter))
5658                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5659         else
5660                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5661         stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5662                                            &stats_cmd->dma, GFP_KERNEL);
5663         if (!stats_cmd->va) {
5664                 status = -ENOMEM;
5665                 goto free_rx_filter;
5666         }
5667
5668         mutex_init(&adapter->mbox_lock);
5669         mutex_init(&adapter->rx_filter_lock);
5670         spin_lock_init(&adapter->mcc_lock);
5671         spin_lock_init(&adapter->mcc_cq_lock);
5672         init_completion(&adapter->et_cmd_compl);
5673
5674         pci_save_state(adapter->pdev);
5675
5676         INIT_DELAYED_WORK(&adapter->work, be_worker);
5677
5678         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5679         adapter->error_recovery.resched_delay = 0;
5680         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5681                           be_err_detection_task);
5682
5683         adapter->rx_fc = true;
5684         adapter->tx_fc = true;
5685
5686         /* Must be a power of 2 or else MODULO will BUG_ON */
5687         adapter->be_get_temp_freq = 64;
5688
5689         return 0;
5690
5691 free_rx_filter:
5692         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5693 free_mbox:
5694         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5695                           mbox_mem_alloc->dma);
5696         return status;
5697 }
5698
5699 static void be_remove(struct pci_dev *pdev)
5700 {
5701         struct be_adapter *adapter = pci_get_drvdata(pdev);
5702
5703         if (!adapter)
5704                 return;
5705
5706         be_roce_dev_remove(adapter);
5707         be_intr_set(adapter, false);
5708
5709         be_cancel_err_detection(adapter);
5710
5711         unregister_netdev(adapter->netdev);
5712
5713         be_clear(adapter);
5714
5715         if (!pci_vfs_assigned(adapter->pdev))
5716                 be_cmd_reset_function(adapter);
5717
5718         /* tell fw we're done with firing cmds */
5719         be_cmd_fw_clean(adapter);
5720
5721         be_unmap_pci_bars(adapter);
5722         be_drv_cleanup(adapter);
5723
5724         pci_release_regions(pdev);
5725         pci_disable_device(pdev);
5726
5727         free_netdev(adapter->netdev);
5728 }
5729
5730 static ssize_t be_hwmon_show_temp(struct device *dev,
5731                                   struct device_attribute *dev_attr,
5732                                   char *buf)
5733 {
5734         struct be_adapter *adapter = dev_get_drvdata(dev);
5735
5736         /* Unit: millidegree Celsius */
5737         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5738                 return -EIO;
5739         else
5740                 return sprintf(buf, "%u\n",
5741                                adapter->hwmon_info.be_on_die_temp * 1000);
5742 }
5743
5744 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5745                           be_hwmon_show_temp, NULL, 1);
5746
5747 static struct attribute *be_hwmon_attrs[] = {
5748         &sensor_dev_attr_temp1_input.dev_attr.attr,
5749         NULL
5750 };
5751
5752 ATTRIBUTE_GROUPS(be_hwmon);
5753
5754 static char *mc_name(struct be_adapter *adapter)
5755 {
5756         char *str = ""; /* default */
5757
5758         switch (adapter->mc_type) {
5759         case UMC:
5760                 str = "UMC";
5761                 break;
5762         case FLEX10:
5763                 str = "FLEX10";
5764                 break;
5765         case vNIC1:
5766                 str = "vNIC-1";
5767                 break;
5768         case nPAR:
5769                 str = "nPAR";
5770                 break;
5771         case UFP:
5772                 str = "UFP";
5773                 break;
5774         case vNIC2:
5775                 str = "vNIC-2";
5776                 break;
5777         default:
5778                 str = "";
5779         }
5780
5781         return str;
5782 }
5783
5784 static inline char *func_name(struct be_adapter *adapter)
5785 {
5786         return be_physfn(adapter) ? "PF" : "VF";
5787 }
5788
5789 static inline char *nic_name(struct pci_dev *pdev)
5790 {
5791         switch (pdev->device) {
5792         case OC_DEVICE_ID1:
5793                 return OC_NAME;
5794         case OC_DEVICE_ID2:
5795                 return OC_NAME_BE;
5796         case OC_DEVICE_ID3:
5797         case OC_DEVICE_ID4:
5798                 return OC_NAME_LANCER;
5799         case BE_DEVICE_ID2:
5800                 return BE3_NAME;
5801         case OC_DEVICE_ID5:
5802         case OC_DEVICE_ID6:
5803                 return OC_NAME_SH;
5804         default:
5805                 return BE_NAME;
5806         }
5807 }
5808
5809 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5810 {
5811         struct be_adapter *adapter;
5812         struct net_device *netdev;
5813         int status = 0;
5814
5815         status = pci_enable_device(pdev);
5816         if (status)
5817                 goto do_none;
5818
5819         status = pci_request_regions(pdev, DRV_NAME);
5820         if (status)
5821                 goto disable_dev;
5822         pci_set_master(pdev);
5823
5824         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5825         if (!netdev) {
5826                 status = -ENOMEM;
5827                 goto rel_reg;
5828         }
5829         adapter = netdev_priv(netdev);
5830         adapter->pdev = pdev;
5831         pci_set_drvdata(pdev, adapter);
5832         adapter->netdev = netdev;
5833         SET_NETDEV_DEV(netdev, &pdev->dev);
5834
5835         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5836         if (status) {
5837                 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5838                 goto free_netdev;
5839         }
5840
5841         status = be_map_pci_bars(adapter);
5842         if (status)
5843                 goto free_netdev;
5844
5845         status = be_drv_init(adapter);
5846         if (status)
5847                 goto unmap_bars;
5848
5849         status = be_setup(adapter);
5850         if (status)
5851                 goto drv_cleanup;
5852
5853         be_netdev_init(netdev);
5854         status = register_netdev(netdev);
5855         if (status != 0)
5856                 goto unsetup;
5857
5858         be_roce_dev_add(adapter);
5859
5860         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5861         adapter->error_recovery.probe_time = jiffies;
5862
5863         /* On Die temperature not supported for VF. */
5864         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5865                 adapter->hwmon_info.hwmon_dev =
5866                         devm_hwmon_device_register_with_groups(&pdev->dev,
5867                                                                DRV_NAME,
5868                                                                adapter,
5869                                                                be_hwmon_groups);
5870                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5871         }
5872
5873         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5874                  func_name(adapter), mc_name(adapter), adapter->port_name);
5875
5876         return 0;
5877
5878 unsetup:
5879         be_clear(adapter);
5880 drv_cleanup:
5881         be_drv_cleanup(adapter);
5882 unmap_bars:
5883         be_unmap_pci_bars(adapter);
5884 free_netdev:
5885         free_netdev(netdev);
5886 rel_reg:
5887         pci_release_regions(pdev);
5888 disable_dev:
5889         pci_disable_device(pdev);
5890 do_none:
5891         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5892         return status;
5893 }
5894
5895 static int __maybe_unused be_suspend(struct device *dev_d)
5896 {
5897         struct be_adapter *adapter = dev_get_drvdata(dev_d);
5898
5899         be_intr_set(adapter, false);
5900         be_cancel_err_detection(adapter);
5901
5902         be_cleanup(adapter);
5903
5904         return 0;
5905 }
5906
5907 static int __maybe_unused be_pci_resume(struct device *dev_d)
5908 {
5909         struct be_adapter *adapter = dev_get_drvdata(dev_d);
5910         int status = 0;
5911
5912         status = be_resume(adapter);
5913         if (status)
5914                 return status;
5915
5916         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5917
5918         return 0;
5919 }
5920
5921 /*
5922  * An FLR will stop BE from DMAing any data.
5923  */
5924 static void be_shutdown(struct pci_dev *pdev)
5925 {
5926         struct be_adapter *adapter = pci_get_drvdata(pdev);
5927
5928         if (!adapter)
5929                 return;
5930
5931         be_roce_dev_shutdown(adapter);
5932         cancel_delayed_work_sync(&adapter->work);
5933         be_cancel_err_detection(adapter);
5934
5935         netif_device_detach(adapter->netdev);
5936
5937         be_cmd_reset_function(adapter);
5938
5939         pci_disable_device(pdev);
5940 }
5941
5942 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5943                                             pci_channel_state_t state)
5944 {
5945         struct be_adapter *adapter = pci_get_drvdata(pdev);
5946
5947         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5948
5949         be_roce_dev_remove(adapter);
5950
5951         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5952                 be_set_error(adapter, BE_ERROR_EEH);
5953
5954                 be_cancel_err_detection(adapter);
5955
5956                 be_cleanup(adapter);
5957         }
5958
5959         if (state == pci_channel_io_perm_failure)
5960                 return PCI_ERS_RESULT_DISCONNECT;
5961
5962         pci_disable_device(pdev);
5963
5964         /* The error could cause the FW to trigger a flash debug dump.
5965          * Resetting the card while flash dump is in progress
5966          * can cause it not to recover; wait for it to finish.
5967          * Wait only for first function as it is needed only once per
5968          * adapter.
5969          */
5970         if (pdev->devfn == 0)
5971                 ssleep(30);
5972
5973         return PCI_ERS_RESULT_NEED_RESET;
5974 }
5975
5976 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5977 {
5978         struct be_adapter *adapter = pci_get_drvdata(pdev);
5979         int status;
5980
5981         dev_info(&adapter->pdev->dev, "EEH reset\n");
5982
5983         status = pci_enable_device(pdev);
5984         if (status)
5985                 return PCI_ERS_RESULT_DISCONNECT;
5986
5987         pci_set_master(pdev);
5988         pci_restore_state(pdev);
5989
5990         /* Check if card is ok and fw is ready */
5991         dev_info(&adapter->pdev->dev,
5992                  "Waiting for FW to be ready after EEH reset\n");
5993         status = be_fw_wait_ready(adapter);
5994         if (status)
5995                 return PCI_ERS_RESULT_DISCONNECT;
5996
5997         be_clear_error(adapter, BE_CLEAR_ALL);
5998         return PCI_ERS_RESULT_RECOVERED;
5999 }
6000
6001 static void be_eeh_resume(struct pci_dev *pdev)
6002 {
6003         int status = 0;
6004         struct be_adapter *adapter = pci_get_drvdata(pdev);
6005
6006         dev_info(&adapter->pdev->dev, "EEH resume\n");
6007
6008         pci_save_state(pdev);
6009
6010         status = be_resume(adapter);
6011         if (status)
6012                 goto err;
6013
6014         be_roce_dev_add(adapter);
6015
6016         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6017         return;
6018 err:
6019         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6020 }
6021
6022 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6023 {
6024         struct be_adapter *adapter = pci_get_drvdata(pdev);
6025         struct be_resources vft_res = {0};
6026         int status;
6027
6028         if (!num_vfs)
6029                 be_vf_clear(adapter);
6030
6031         adapter->num_vfs = num_vfs;
6032
6033         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6034                 dev_warn(&pdev->dev,
6035                          "Cannot disable VFs while they are assigned\n");
6036                 return -EBUSY;
6037         }
6038
6039         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6040          * are equally distributed across the max-number of VFs. The user may
6041          * request only a subset of the max-vfs to be enabled.
6042          * Based on num_vfs, redistribute the resources across num_vfs so that
6043          * each VF will have access to more number of resources.
6044          * This facility is not available in BE3 FW.
6045          * Also, this is done by FW in Lancer chip.
6046          */
6047         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6048                 be_calculate_vf_res(adapter, adapter->num_vfs,
6049                                     &vft_res);
6050                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6051                                                  adapter->num_vfs, &vft_res);
6052                 if (status)
6053                         dev_err(&pdev->dev,
6054                                 "Failed to optimize SR-IOV resources\n");
6055         }
6056
6057         status = be_get_resources(adapter);
6058         if (status)
6059                 return be_cmd_status(status);
6060
6061         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6062         rtnl_lock();
6063         status = be_update_queues(adapter);
6064         rtnl_unlock();
6065         if (status)
6066                 return be_cmd_status(status);
6067
6068         if (adapter->num_vfs)
6069                 status = be_vf_setup(adapter);
6070
6071         if (!status)
6072                 return adapter->num_vfs;
6073
6074         return 0;
6075 }
6076
6077 static const struct pci_error_handlers be_eeh_handlers = {
6078         .error_detected = be_eeh_err_detected,
6079         .slot_reset = be_eeh_reset,
6080         .resume = be_eeh_resume,
6081 };
6082
6083 static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6084
6085 static struct pci_driver be_driver = {
6086         .name = DRV_NAME,
6087         .id_table = be_dev_ids,
6088         .probe = be_probe,
6089         .remove = be_remove,
6090         .driver.pm = &be_pci_pm_ops,
6091         .shutdown = be_shutdown,
6092         .sriov_configure = be_pci_sriov_configure,
6093         .err_handler = &be_eeh_handlers
6094 };
6095
6096 static int __init be_init_module(void)
6097 {
6098         int status;
6099
6100         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6101             rx_frag_size != 2048) {
6102                 printk(KERN_WARNING DRV_NAME
6103                         " : Module param rx_frag_size must be 2048/4096/8192."
6104                         " Using 2048\n");
6105                 rx_frag_size = 2048;
6106         }
6107
6108         if (num_vfs > 0) {
6109                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6110                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6111         }
6112
6113         be_wq = create_singlethread_workqueue("be_wq");
6114         if (!be_wq) {
6115                 pr_warn(DRV_NAME "workqueue creation failed\n");
6116                 return -1;
6117         }
6118
6119         be_err_recovery_workq =
6120                 create_singlethread_workqueue("be_err_recover");
6121         if (!be_err_recovery_workq)
6122                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6123
6124         status = pci_register_driver(&be_driver);
6125         if (status) {
6126                 destroy_workqueue(be_wq);
6127                 be_destroy_err_recovery_workq();
6128         }
6129         return status;
6130 }
6131 module_init(be_init_module);
6132
6133 static void __exit be_exit_module(void)
6134 {
6135         pci_unregister_driver(&be_driver);
6136
6137         be_destroy_err_recovery_workq();
6138
6139         if (be_wq)
6140                 destroy_workqueue(be_wq);
6141 }
6142 module_exit(be_exit_module);