net/mlx4_core: Set device configuration data to be persistent across reset
[linux-2.6-block.git] / drivers / net / ethernet / mellanox / mlx4 / main.c
CommitLineData
225c7b1f
RD
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
51a379d0 4 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
225c7b1f
RD
5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/module.h>
37#include <linux/init.h>
38#include <linux/errno.h>
39#include <linux/pci.h>
40#include <linux/dma-mapping.h>
5a0e3ad6 41#include <linux/slab.h>
c1b43dca 42#include <linux/io-mapping.h>
ab9c17a0 43#include <linux/delay.h>
b046ffe5 44#include <linux/kmod.h>
225c7b1f
RD
45
46#include <linux/mlx4/device.h>
47#include <linux/mlx4/doorbell.h>
48
49#include "mlx4.h"
50#include "fw.h"
51#include "icm.h"
52
53MODULE_AUTHOR("Roland Dreier");
54MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
55MODULE_LICENSE("Dual BSD/GPL");
56MODULE_VERSION(DRV_VERSION);
57
27bf91d6
YP
58struct workqueue_struct *mlx4_wq;
59
225c7b1f
RD
60#ifdef CONFIG_MLX4_DEBUG
61
62int mlx4_debug_level = 0;
63module_param_named(debug_level, mlx4_debug_level, int, 0644);
64MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
65
66#endif /* CONFIG_MLX4_DEBUG */
67
68#ifdef CONFIG_PCI_MSI
69
08fb1055 70static int msi_x = 1;
225c7b1f
RD
71module_param(msi_x, int, 0444);
72MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
73
74#else /* CONFIG_PCI_MSI */
75
76#define msi_x (0)
77
78#endif /* CONFIG_PCI_MSI */
79
dd41cc3b 80static uint8_t num_vfs[3] = {0, 0, 0};
effa4bc4 81static int num_vfs_argc;
dd41cc3b
MB
82module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
83MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
84 "num_vfs=port1,port2,port1+2");
85
86static uint8_t probe_vf[3] = {0, 0, 0};
effa4bc4 87static int probe_vfs_argc;
dd41cc3b
MB
88module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
89MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
90 "probe_vf=port1,port2,port1+2");
ab9c17a0 91
3c439b55 92int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
0ec2c0f8
EE
93module_param_named(log_num_mgm_entry_size,
94 mlx4_log_num_mgm_entry_size, int, 0444);
95MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
96 " of qp per mcg, for example:"
3c439b55 97 " 10 gives 248.range: 7 <="
0ff1fb65 98 " log_num_mgm_entry_size <= 12."
3c439b55
JM
99 " To activate device managed"
100 " flow steering when available, set to -1");
0ec2c0f8 101
be902ab1 102static bool enable_64b_cqe_eqe = true;
08ff3235
OG
103module_param(enable_64b_cqe_eqe, bool, 0444);
104MODULE_PARM_DESC(enable_64b_cqe_eqe,
be902ab1 105 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
08ff3235 106
77507aa2 107#define PF_CONTEXT_BEHAVIOUR_MASK (MLX4_FUNC_CAP_64B_EQE_CQE | \
7d077cd3
MB
108 MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
109 MLX4_FUNC_CAP_DMFS_A0_STATIC)
ab9c17a0 110
f57e6848 111static char mlx4_version[] =
225c7b1f
RD
112 DRV_NAME ": Mellanox ConnectX core driver v"
113 DRV_VERSION " (" DRV_RELDATE ")\n";
114
115static struct mlx4_profile default_profile = {
ab9c17a0 116 .num_qp = 1 << 18,
225c7b1f 117 .num_srq = 1 << 16,
c9f2ba5e 118 .rdmarc_per_qp = 1 << 4,
225c7b1f
RD
119 .num_cq = 1 << 16,
120 .num_mcg = 1 << 13,
ab9c17a0 121 .num_mpt = 1 << 19,
9fd7a1e1 122 .num_mtt = 1 << 20, /* It is really num mtt segements */
225c7b1f
RD
123};
124
2599d858
AV
125static struct mlx4_profile low_mem_profile = {
126 .num_qp = 1 << 17,
127 .num_srq = 1 << 6,
128 .rdmarc_per_qp = 1 << 4,
129 .num_cq = 1 << 8,
130 .num_mcg = 1 << 8,
131 .num_mpt = 1 << 9,
132 .num_mtt = 1 << 7,
133};
134
ab9c17a0 135static int log_num_mac = 7;
93fc9e1b
YP
136module_param_named(log_num_mac, log_num_mac, int, 0444);
137MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
138
139static int log_num_vlan;
140module_param_named(log_num_vlan, log_num_vlan, int, 0444);
141MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
cb29688a
OG
142/* Log2 max number of VLANs per ETH port (0-7) */
143#define MLX4_LOG_NUM_VLANS 7
2599d858
AV
144#define MLX4_MIN_LOG_NUM_VLANS 0
145#define MLX4_MIN_LOG_NUM_MAC 1
93fc9e1b 146
eb939922 147static bool use_prio;
93fc9e1b 148module_param_named(use_prio, use_prio, bool, 0444);
ecc8fb11 149MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
93fc9e1b 150
2b8fb286 151int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
ab6bf42e 152module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
0498628f 153MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
ab6bf42e 154
8d0fc7b6 155static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
ab9c17a0
JM
156static int arr_argc = 2;
157module_param_array(port_type_array, int, &arr_argc, 0444);
8d0fc7b6
YP
158MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
159 "1 for IB, 2 for Ethernet");
ab9c17a0
JM
160
161struct mlx4_port_config {
162 struct list_head list;
163 enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
164 struct pci_dev *pdev;
165};
166
97989356
AV
167static atomic_t pf_loading = ATOMIC_INIT(0);
168
27bf91d6
YP
169int mlx4_check_port_params(struct mlx4_dev *dev,
170 enum mlx4_port_type *port_type)
7ff93f8b
YP
171{
172 int i;
173
0b997657
YS
174 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
175 for (i = 0; i < dev->caps.num_ports - 1; i++) {
176 if (port_type[i] != port_type[i + 1]) {
1a91de28 177 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
27bf91d6
YP
178 return -EINVAL;
179 }
7ff93f8b
YP
180 }
181 }
7ff93f8b
YP
182
183 for (i = 0; i < dev->caps.num_ports; i++) {
184 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
1a91de28
JP
185 mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
186 i + 1);
7ff93f8b
YP
187 return -EINVAL;
188 }
189 }
190 return 0;
191}
192
193static void mlx4_set_port_mask(struct mlx4_dev *dev)
194{
195 int i;
196
7ff93f8b 197 for (i = 1; i <= dev->caps.num_ports; ++i)
65dab25d 198 dev->caps.port_mask[i] = dev->caps.port_type[i];
7ff93f8b 199}
f2a3f6a3 200
7ae0e400
MB
201enum {
202 MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
203};
204
205static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
206{
207 int err = 0;
208 struct mlx4_func func;
209
210 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
211 err = mlx4_QUERY_FUNC(dev, &func, 0);
212 if (err) {
213 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
214 return err;
215 }
216 dev_cap->max_eqs = func.max_eq;
217 dev_cap->reserved_eqs = func.rsvd_eqs;
218 dev_cap->reserved_uars = func.rsvd_uars;
219 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
220 }
221 return err;
222}
223
77507aa2
IS
224static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
225{
226 struct mlx4_caps *dev_cap = &dev->caps;
227
228 /* FW not supporting or cancelled by user */
229 if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
230 !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
231 return;
232
233 /* Must have 64B CQE_EQE enabled by FW to use bigger stride
234 * When FW has NCSI it may decide not to report 64B CQE/EQEs
235 */
236 if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
237 !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
238 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
239 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
240 return;
241 }
242
243 if (cache_line_size() == 128 || cache_line_size() == 256) {
244 mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
245 /* Changing the real data inside CQE size to 32B */
246 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
247 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
248
249 if (mlx4_is_master(dev))
250 dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
251 } else {
252 mlx4_dbg(dev, "Disabling CQE stride cacheLine unsupported\n");
253 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
254 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
255 }
256}
257
431df8c7
MB
258static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
259 struct mlx4_port_cap *port_cap)
260{
261 dev->caps.vl_cap[port] = port_cap->max_vl;
262 dev->caps.ib_mtu_cap[port] = port_cap->ib_mtu;
263 dev->phys_caps.gid_phys_table_len[port] = port_cap->max_gids;
264 dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
265 /* set gid and pkey table operating lengths by default
266 * to non-sriov values
267 */
268 dev->caps.gid_table_len[port] = port_cap->max_gids;
269 dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
270 dev->caps.port_width_cap[port] = port_cap->max_port_width;
271 dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu;
272 dev->caps.def_mac[port] = port_cap->def_mac;
273 dev->caps.supported_type[port] = port_cap->supported_port_types;
274 dev->caps.suggested_type[port] = port_cap->suggested_type;
275 dev->caps.default_sense[port] = port_cap->default_sense;
276 dev->caps.trans_type[port] = port_cap->trans_type;
277 dev->caps.vendor_oui[port] = port_cap->vendor_oui;
278 dev->caps.wavelength[port] = port_cap->wavelength;
279 dev->caps.trans_code[port] = port_cap->trans_code;
280
281 return 0;
282}
283
284static int mlx4_dev_port(struct mlx4_dev *dev, int port,
285 struct mlx4_port_cap *port_cap)
286{
287 int err = 0;
288
289 err = mlx4_QUERY_PORT(dev, port, port_cap);
290
291 if (err)
292 mlx4_err(dev, "QUERY_PORT command failed.\n");
293
294 return err;
295}
296
297#define MLX4_A0_STEERING_TABLE_SIZE 256
3d73c288 298static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
225c7b1f
RD
299{
300 int err;
5ae2a7a8 301 int i;
225c7b1f
RD
302
303 err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
304 if (err) {
1a91de28 305 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
225c7b1f
RD
306 return err;
307 }
c78e25ed 308 mlx4_dev_cap_dump(dev, dev_cap);
225c7b1f
RD
309
310 if (dev_cap->min_page_sz > PAGE_SIZE) {
1a91de28 311 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
225c7b1f
RD
312 dev_cap->min_page_sz, PAGE_SIZE);
313 return -ENODEV;
314 }
315 if (dev_cap->num_ports > MLX4_MAX_PORTS) {
1a91de28 316 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
225c7b1f
RD
317 dev_cap->num_ports, MLX4_MAX_PORTS);
318 return -ENODEV;
319 }
320
872bf2fb 321 if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
1a91de28 322 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
225c7b1f 323 dev_cap->uar_size,
872bf2fb
YH
324 (unsigned long long)
325 pci_resource_len(dev->persist->pdev, 2));
225c7b1f
RD
326 return -ENODEV;
327 }
328
329 dev->caps.num_ports = dev_cap->num_ports;
7ae0e400
MB
330 dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
331 dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
332 dev->caps.num_sys_eqs :
333 MLX4_MAX_EQ_NUM;
5ae2a7a8 334 for (i = 1; i <= dev->caps.num_ports; ++i) {
431df8c7
MB
335 err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
336 if (err) {
337 mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
338 return err;
339 }
5ae2a7a8
RD
340 }
341
ab9c17a0 342 dev->caps.uar_page_size = PAGE_SIZE;
225c7b1f 343 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE;
225c7b1f
RD
344 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
345 dev->caps.bf_reg_size = dev_cap->bf_reg_size;
346 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page;
347 dev->caps.max_sq_sg = dev_cap->max_sq_sg;
348 dev->caps.max_rq_sg = dev_cap->max_rq_sg;
349 dev->caps.max_wqes = dev_cap->max_qp_sz;
350 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp;
225c7b1f
RD
351 dev->caps.max_srq_wqes = dev_cap->max_srq_sz;
352 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1;
353 dev->caps.reserved_srqs = dev_cap->reserved_srqs;
354 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz;
355 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
225c7b1f
RD
356 /*
357 * Subtract 1 from the limit because we need to allocate a
358 * spare CQE so the HCA HW can tell the difference between an
359 * empty CQ and a full CQ.
360 */
361 dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
362 dev->caps.reserved_cqs = dev_cap->reserved_cqs;
363 dev->caps.reserved_eqs = dev_cap->reserved_eqs;
2b8fb286 364 dev->caps.reserved_mtts = dev_cap->reserved_mtts;
225c7b1f 365 dev->caps.reserved_mrws = dev_cap->reserved_mrws;
ab9c17a0
JM
366
367 /* The first 128 UARs are used for EQ doorbells */
368 dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars);
225c7b1f 369 dev->caps.reserved_pds = dev_cap->reserved_pds;
012a8ff5
SH
370 dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
371 dev_cap->reserved_xrcds : 0;
372 dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
373 dev_cap->max_xrcds : 0;
2b8fb286
MA
374 dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz;
375
149983af 376 dev->caps.max_msg_sz = dev_cap->max_msg_sz;
225c7b1f
RD
377 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1);
378 dev->caps.flags = dev_cap->flags;
b3416f44 379 dev->caps.flags2 = dev_cap->flags2;
95d04f07
RD
380 dev->caps.bmme_flags = dev_cap->bmme_flags;
381 dev->caps.reserved_lkey = dev_cap->reserved_lkey;
225c7b1f 382 dev->caps.stat_rate_support = dev_cap->stat_rate_support;
b832be1e 383 dev->caps.max_gso_sz = dev_cap->max_gso_sz;
b3416f44 384 dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz;
225c7b1f 385
ca3e57a5
RD
386 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
387 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
58a60168 388 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
aadf4f3f
RD
389 /* Don't do sense port on multifunction devices (for now at least) */
390 if (mlx4_is_mfunc(dev))
391 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
58a60168 392
2599d858
AV
393 if (mlx4_low_memory_profile()) {
394 dev->caps.log_num_macs = MLX4_MIN_LOG_NUM_MAC;
395 dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
396 } else {
397 dev->caps.log_num_macs = log_num_mac;
398 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
399 }
93fc9e1b
YP
400
401 for (i = 1; i <= dev->caps.num_ports; ++i) {
ab9c17a0
JM
402 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
403 if (dev->caps.supported_type[i]) {
404 /* if only ETH is supported - assign ETH */
405 if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
406 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
105c320f 407 /* if only IB is supported, assign IB */
ab9c17a0 408 else if (dev->caps.supported_type[i] ==
105c320f
JM
409 MLX4_PORT_TYPE_IB)
410 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
ab9c17a0 411 else {
105c320f
JM
412 /* if IB and ETH are supported, we set the port
413 * type according to user selection of port type;
414 * if user selected none, take the FW hint */
415 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
8d0fc7b6
YP
416 dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
417 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
ab9c17a0 418 else
105c320f 419 dev->caps.port_type[i] = port_type_array[i - 1];
ab9c17a0
JM
420 }
421 }
8d0fc7b6
YP
422 /*
423 * Link sensing is allowed on the port if 3 conditions are true:
424 * 1. Both protocols are supported on the port.
425 * 2. Different types are supported on the port
426 * 3. FW declared that it supports link sensing
427 */
27bf91d6 428 mlx4_priv(dev)->sense.sense_allowed[i] =
58a60168 429 ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
8d0fc7b6 430 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
58a60168 431 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
7ff93f8b 432
8d0fc7b6
YP
433 /*
434 * If "default_sense" bit is set, we move the port to "AUTO" mode
435 * and perform sense_port FW command to try and set the correct
436 * port type from beginning
437 */
46c46747 438 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
8d0fc7b6
YP
439 enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
440 dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
441 mlx4_SENSE_PORT(dev, i, &sensed_port);
442 if (sensed_port != MLX4_PORT_TYPE_NONE)
443 dev->caps.port_type[i] = sensed_port;
444 } else {
445 dev->caps.possible_type[i] = dev->caps.port_type[i];
446 }
447
431df8c7
MB
448 if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
449 dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
1a91de28 450 mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
93fc9e1b
YP
451 i, 1 << dev->caps.log_num_macs);
452 }
431df8c7
MB
453 if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
454 dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
1a91de28 455 mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
93fc9e1b
YP
456 i, 1 << dev->caps.log_num_vlans);
457 }
458 }
459
f2a3f6a3
OG
460 dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
461
93fc9e1b
YP
462 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
463 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
464 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
465 (1 << dev->caps.log_num_macs) *
466 (1 << dev->caps.log_num_vlans) *
93fc9e1b
YP
467 dev->caps.num_ports;
468 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
7d077cd3
MB
469
470 if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
471 dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
472 dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
473 else
474 dev->caps.dmfs_high_rate_qpn_base =
475 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
476
477 if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
478 dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
479 dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
480 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
481 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
482 } else {
483 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
484 dev->caps.dmfs_high_rate_qpn_base =
485 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
486 dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
487 }
488
d57febe1 489 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
7d077cd3 490 dev->caps.dmfs_high_rate_qpn_range;
93fc9e1b
YP
491
492 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
493 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
494 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
495 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
496
e2c76824 497 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
08ff3235 498
b3051320 499 if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
08ff3235
OG
500 if (dev_cap->flags &
501 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
502 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
503 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
504 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
505 }
77507aa2
IS
506
507 if (dev_cap->flags2 &
508 (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
509 MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
510 mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
511 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
512 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
513 }
08ff3235
OG
514 }
515
f97b4b5d 516 if ((dev->caps.flags &
08ff3235
OG
517 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
518 mlx4_is_master(dev))
519 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
520
ddae0349 521 if (!mlx4_is_slave(dev)) {
77507aa2 522 mlx4_enable_cqe_eqe_stride(dev);
ddae0349 523 dev->caps.alloc_res_qp_mask =
d57febe1
MB
524 (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
525 MLX4_RESERVE_A0_QP;
ddae0349
EE
526 } else {
527 dev->caps.alloc_res_qp_mask = 0;
528 }
77507aa2 529
225c7b1f
RD
530 return 0;
531}
b912b2f8
EP
532
533static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
534 enum pci_bus_speed *speed,
535 enum pcie_link_width *width)
536{
537 u32 lnkcap1, lnkcap2;
538 int err1, err2;
539
540#define PCIE_MLW_CAP_SHIFT 4 /* start of MLW mask in link capabilities */
541
542 *speed = PCI_SPEED_UNKNOWN;
543 *width = PCIE_LNK_WIDTH_UNKNOWN;
544
872bf2fb
YH
545 err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
546 &lnkcap1);
547 err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
548 &lnkcap2);
b912b2f8
EP
549 if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
550 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
551 *speed = PCIE_SPEED_8_0GT;
552 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
553 *speed = PCIE_SPEED_5_0GT;
554 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
555 *speed = PCIE_SPEED_2_5GT;
556 }
557 if (!err1) {
558 *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
559 if (!lnkcap2) { /* pre-r3.0 */
560 if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
561 *speed = PCIE_SPEED_5_0GT;
562 else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
563 *speed = PCIE_SPEED_2_5GT;
564 }
565 }
566
567 if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
568 return err1 ? err1 :
569 err2 ? err2 : -EINVAL;
570 }
571 return 0;
572}
573
574static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
575{
576 enum pcie_link_width width, width_cap;
577 enum pci_bus_speed speed, speed_cap;
578 int err;
579
580#define PCIE_SPEED_STR(speed) \
581 (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
582 speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
583 speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
584 "Unknown")
585
586 err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
587 if (err) {
588 mlx4_warn(dev,
589 "Unable to determine PCIe device BW capabilities\n");
590 return;
591 }
592
872bf2fb 593 err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
b912b2f8
EP
594 if (err || speed == PCI_SPEED_UNKNOWN ||
595 width == PCIE_LNK_WIDTH_UNKNOWN) {
596 mlx4_warn(dev,
597 "Unable to determine PCI device chain minimum BW\n");
598 return;
599 }
600
601 if (width != width_cap || speed != speed_cap)
602 mlx4_warn(dev,
603 "PCIe BW is different than device's capability\n");
604
605 mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
606 PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
607 mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
608 width, width_cap);
609 return;
610}
611
ab9c17a0
JM
612/*The function checks if there are live vf, return the num of them*/
613static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
614{
615 struct mlx4_priv *priv = mlx4_priv(dev);
616 struct mlx4_slave_state *s_state;
617 int i;
618 int ret = 0;
619
620 for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
621 s_state = &priv->mfunc.master.slave_state[i];
622 if (s_state->active && s_state->last_cmd !=
623 MLX4_COMM_CMD_RESET) {
624 mlx4_warn(dev, "%s: slave: %d is still active\n",
625 __func__, i);
626 ret++;
627 }
628 }
629 return ret;
630}
631
396f2feb
JM
632int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
633{
634 u32 qk = MLX4_RESERVED_QKEY_BASE;
47605df9
JM
635
636 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
637 qpn < dev->phys_caps.base_proxy_sqpn)
396f2feb
JM
638 return -EINVAL;
639
47605df9 640 if (qpn >= dev->phys_caps.base_tunnel_sqpn)
396f2feb 641 /* tunnel qp */
47605df9 642 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
396f2feb 643 else
47605df9 644 qk += qpn - dev->phys_caps.base_proxy_sqpn;
396f2feb
JM
645 *qkey = qk;
646 return 0;
647}
648EXPORT_SYMBOL(mlx4_get_parav_qkey);
649
54679e14
JM
650void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
651{
652 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
653
654 if (!mlx4_is_master(dev))
655 return;
656
657 priv->virt2phys_pkey[slave][port - 1][i] = val;
658}
659EXPORT_SYMBOL(mlx4_sync_pkey_table);
660
afa8fd1d
JM
661void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
662{
663 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
664
665 if (!mlx4_is_master(dev))
666 return;
667
668 priv->slave_node_guids[slave] = guid;
669}
670EXPORT_SYMBOL(mlx4_put_slave_node_guid);
671
672__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
673{
674 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
675
676 if (!mlx4_is_master(dev))
677 return 0;
678
679 return priv->slave_node_guids[slave];
680}
681EXPORT_SYMBOL(mlx4_get_slave_node_guid);
682
e10903b0 683int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
ab9c17a0
JM
684{
685 struct mlx4_priv *priv = mlx4_priv(dev);
686 struct mlx4_slave_state *s_slave;
687
688 if (!mlx4_is_master(dev))
689 return 0;
690
691 s_slave = &priv->mfunc.master.slave_state[slave];
692 return !!s_slave->active;
693}
694EXPORT_SYMBOL(mlx4_is_slave_active);
695
7b8157be
JM
696static void slave_adjust_steering_mode(struct mlx4_dev *dev,
697 struct mlx4_dev_cap *dev_cap,
698 struct mlx4_init_hca_param *hca_param)
699{
700 dev->caps.steering_mode = hca_param->steering_mode;
701 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
702 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
703 dev->caps.fs_log_max_ucast_qp_range_size =
704 dev_cap->fs_log_max_ucast_qp_range_size;
705 } else
706 dev->caps.num_qp_per_mgm =
707 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
708
709 mlx4_dbg(dev, "Steering mode is: %s\n",
710 mlx4_steering_mode_str(dev->caps.steering_mode));
711}
712
ab9c17a0
JM
713static int mlx4_slave_cap(struct mlx4_dev *dev)
714{
715 int err;
716 u32 page_size;
717 struct mlx4_dev_cap dev_cap;
718 struct mlx4_func_cap func_cap;
719 struct mlx4_init_hca_param hca_param;
225c6c8c 720 u8 i;
ab9c17a0
JM
721
722 memset(&hca_param, 0, sizeof(hca_param));
723 err = mlx4_QUERY_HCA(dev, &hca_param);
724 if (err) {
1a91de28 725 mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
ab9c17a0
JM
726 return err;
727 }
728
483e0132
EP
729 /* fail if the hca has an unknown global capability
730 * at this time global_caps should be always zeroed
731 */
732 if (hca_param.global_caps) {
ab9c17a0
JM
733 mlx4_err(dev, "Unknown hca global capabilities\n");
734 return -ENOSYS;
735 }
736
737 mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
738
ddd8a6c1
EE
739 dev->caps.hca_core_clock = hca_param.hca_core_clock;
740
ab9c17a0 741 memset(&dev_cap, 0, sizeof(dev_cap));
b91cb3eb 742 dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
ab9c17a0
JM
743 err = mlx4_dev_cap(dev, &dev_cap);
744 if (err) {
1a91de28 745 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
ab9c17a0
JM
746 return err;
747 }
748
b91cb3eb
JM
749 err = mlx4_QUERY_FW(dev);
750 if (err)
1a91de28 751 mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
b91cb3eb 752
ab9c17a0
JM
753 page_size = ~dev->caps.page_size_cap + 1;
754 mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
755 if (page_size > PAGE_SIZE) {
1a91de28 756 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
ab9c17a0
JM
757 page_size, PAGE_SIZE);
758 return -ENODEV;
759 }
760
761 /* slave gets uar page size from QUERY_HCA fw command */
762 dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
763
764 /* TODO: relax this assumption */
765 if (dev->caps.uar_page_size != PAGE_SIZE) {
766 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
767 dev->caps.uar_page_size, PAGE_SIZE);
768 return -ENODEV;
769 }
770
771 memset(&func_cap, 0, sizeof(func_cap));
47605df9 772 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
ab9c17a0 773 if (err) {
1a91de28
JP
774 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
775 err);
ab9c17a0
JM
776 return err;
777 }
778
779 if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
780 PF_CONTEXT_BEHAVIOUR_MASK) {
7d077cd3
MB
781 mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
782 func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
ab9c17a0
JM
783 return -ENOSYS;
784 }
785
ab9c17a0 786 dev->caps.num_ports = func_cap.num_ports;
5a0d0a61
JM
787 dev->quotas.qp = func_cap.qp_quota;
788 dev->quotas.srq = func_cap.srq_quota;
789 dev->quotas.cq = func_cap.cq_quota;
790 dev->quotas.mpt = func_cap.mpt_quota;
791 dev->quotas.mtt = func_cap.mtt_quota;
792 dev->caps.num_qps = 1 << hca_param.log_num_qps;
793 dev->caps.num_srqs = 1 << hca_param.log_num_srqs;
794 dev->caps.num_cqs = 1 << hca_param.log_num_cqs;
795 dev->caps.num_mpts = 1 << hca_param.log_mpt_sz;
796 dev->caps.num_eqs = func_cap.max_eq;
797 dev->caps.reserved_eqs = func_cap.reserved_eq;
ab9c17a0
JM
798 dev->caps.num_pds = MLX4_NUM_PDS;
799 dev->caps.num_mgms = 0;
800 dev->caps.num_amgms = 0;
801
ab9c17a0 802 if (dev->caps.num_ports > MLX4_MAX_PORTS) {
1a91de28
JP
803 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
804 dev->caps.num_ports, MLX4_MAX_PORTS);
ab9c17a0
JM
805 return -ENODEV;
806 }
807
99ec41d0 808 dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
47605df9
JM
809 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
810 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
811 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
812 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
813
814 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
99ec41d0
JM
815 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
816 !dev->caps.qp0_qkey) {
47605df9
JM
817 err = -ENOMEM;
818 goto err_mem;
819 }
820
6634961c 821 for (i = 1; i <= dev->caps.num_ports; ++i) {
225c6c8c 822 err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
47605df9 823 if (err) {
1a91de28
JP
824 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
825 i, err);
47605df9
JM
826 goto err_mem;
827 }
99ec41d0 828 dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
47605df9
JM
829 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
830 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
831 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
832 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
6230bb23 833 dev->caps.port_mask[i] = dev->caps.port_type[i];
8e1a28e8 834 dev->caps.phys_port_id[i] = func_cap.phys_port_id;
6634961c
JM
835 if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
836 &dev->caps.gid_table_len[i],
837 &dev->caps.pkey_table_len[i]))
47605df9 838 goto err_mem;
6634961c 839 }
6230bb23 840
ab9c17a0
JM
841 if (dev->caps.uar_page_size * (dev->caps.num_uars -
842 dev->caps.reserved_uars) >
872bf2fb
YH
843 pci_resource_len(dev->persist->pdev,
844 2)) {
1a91de28 845 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
ab9c17a0 846 dev->caps.uar_page_size * dev->caps.num_uars,
872bf2fb
YH
847 (unsigned long long)
848 pci_resource_len(dev->persist->pdev, 2));
47605df9 849 goto err_mem;
ab9c17a0
JM
850 }
851
08ff3235
OG
852 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
853 dev->caps.eqe_size = 64;
854 dev->caps.eqe_factor = 1;
855 } else {
856 dev->caps.eqe_size = 32;
857 dev->caps.eqe_factor = 0;
858 }
859
860 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
861 dev->caps.cqe_size = 64;
77507aa2 862 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
08ff3235
OG
863 } else {
864 dev->caps.cqe_size = 32;
865 }
866
77507aa2
IS
867 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
868 dev->caps.eqe_size = hca_param.eqe_size;
869 dev->caps.eqe_factor = 0;
870 }
871
872 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
873 dev->caps.cqe_size = hca_param.cqe_size;
874 /* User still need to know when CQE > 32B */
875 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
876 }
877
f9bd2d7f 878 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1a91de28 879 mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
f9bd2d7f 880
7b8157be
JM
881 slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
882
ddae0349
EE
883 if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
884 dev->caps.bf_reg_size)
885 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
886
d57febe1
MB
887 if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
888 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
889
ab9c17a0 890 return 0;
47605df9
JM
891
892err_mem:
99ec41d0 893 kfree(dev->caps.qp0_qkey);
47605df9
JM
894 kfree(dev->caps.qp0_tunnel);
895 kfree(dev->caps.qp0_proxy);
896 kfree(dev->caps.qp1_tunnel);
897 kfree(dev->caps.qp1_proxy);
99ec41d0
JM
898 dev->caps.qp0_qkey = NULL;
899 dev->caps.qp0_tunnel = NULL;
900 dev->caps.qp0_proxy = NULL;
901 dev->caps.qp1_tunnel = NULL;
902 dev->caps.qp1_proxy = NULL;
47605df9
JM
903
904 return err;
ab9c17a0 905}
225c7b1f 906
b046ffe5
EP
907static void mlx4_request_modules(struct mlx4_dev *dev)
908{
909 int port;
910 int has_ib_port = false;
911 int has_eth_port = false;
912#define EN_DRV_NAME "mlx4_en"
913#define IB_DRV_NAME "mlx4_ib"
914
915 for (port = 1; port <= dev->caps.num_ports; port++) {
916 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
917 has_ib_port = true;
918 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
919 has_eth_port = true;
920 }
921
b046ffe5
EP
922 if (has_eth_port)
923 request_module_nowait(EN_DRV_NAME);
f24f790f
OG
924 if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
925 request_module_nowait(IB_DRV_NAME);
b046ffe5
EP
926}
927
7ff93f8b
YP
928/*
929 * Change the port configuration of the device.
930 * Every user of this function must hold the port mutex.
931 */
27bf91d6
YP
932int mlx4_change_port_types(struct mlx4_dev *dev,
933 enum mlx4_port_type *port_types)
7ff93f8b
YP
934{
935 int err = 0;
936 int change = 0;
937 int port;
938
939 for (port = 0; port < dev->caps.num_ports; port++) {
27bf91d6
YP
940 /* Change the port type only if the new type is different
941 * from the current, and not set to Auto */
3d8f9308 942 if (port_types[port] != dev->caps.port_type[port + 1])
7ff93f8b 943 change = 1;
7ff93f8b
YP
944 }
945 if (change) {
946 mlx4_unregister_device(dev);
947 for (port = 1; port <= dev->caps.num_ports; port++) {
948 mlx4_CLOSE_PORT(dev, port);
1e0f03d5 949 dev->caps.port_type[port] = port_types[port - 1];
6634961c 950 err = mlx4_SET_PORT(dev, port, -1);
7ff93f8b 951 if (err) {
1a91de28
JP
952 mlx4_err(dev, "Failed to set port %d, aborting\n",
953 port);
7ff93f8b
YP
954 goto out;
955 }
956 }
957 mlx4_set_port_mask(dev);
958 err = mlx4_register_device(dev);
b046ffe5
EP
959 if (err) {
960 mlx4_err(dev, "Failed to register device\n");
961 goto out;
962 }
963 mlx4_request_modules(dev);
7ff93f8b
YP
964 }
965
966out:
967 return err;
968}
969
970static ssize_t show_port_type(struct device *dev,
971 struct device_attribute *attr,
972 char *buf)
973{
974 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
975 port_attr);
976 struct mlx4_dev *mdev = info->dev;
27bf91d6
YP
977 char type[8];
978
979 sprintf(type, "%s",
980 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
981 "ib" : "eth");
982 if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
983 sprintf(buf, "auto (%s)\n", type);
984 else
985 sprintf(buf, "%s\n", type);
7ff93f8b 986
27bf91d6 987 return strlen(buf);
7ff93f8b
YP
988}
989
990static ssize_t set_port_type(struct device *dev,
991 struct device_attribute *attr,
992 const char *buf, size_t count)
993{
994 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
995 port_attr);
996 struct mlx4_dev *mdev = info->dev;
997 struct mlx4_priv *priv = mlx4_priv(mdev);
998 enum mlx4_port_type types[MLX4_MAX_PORTS];
27bf91d6 999 enum mlx4_port_type new_types[MLX4_MAX_PORTS];
0a984556 1000 static DEFINE_MUTEX(set_port_type_mutex);
7ff93f8b
YP
1001 int i;
1002 int err = 0;
1003
0a984556
AV
1004 mutex_lock(&set_port_type_mutex);
1005
7ff93f8b
YP
1006 if (!strcmp(buf, "ib\n"))
1007 info->tmp_type = MLX4_PORT_TYPE_IB;
1008 else if (!strcmp(buf, "eth\n"))
1009 info->tmp_type = MLX4_PORT_TYPE_ETH;
27bf91d6
YP
1010 else if (!strcmp(buf, "auto\n"))
1011 info->tmp_type = MLX4_PORT_TYPE_AUTO;
7ff93f8b
YP
1012 else {
1013 mlx4_err(mdev, "%s is not supported port type\n", buf);
0a984556
AV
1014 err = -EINVAL;
1015 goto err_out;
7ff93f8b
YP
1016 }
1017
27bf91d6 1018 mlx4_stop_sense(mdev);
7ff93f8b 1019 mutex_lock(&priv->port_mutex);
27bf91d6
YP
1020 /* Possible type is always the one that was delivered */
1021 mdev->caps.possible_type[info->port] = info->tmp_type;
1022
1023 for (i = 0; i < mdev->caps.num_ports; i++) {
7ff93f8b 1024 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
27bf91d6
YP
1025 mdev->caps.possible_type[i+1];
1026 if (types[i] == MLX4_PORT_TYPE_AUTO)
1027 types[i] = mdev->caps.port_type[i+1];
1028 }
7ff93f8b 1029
58a60168
YP
1030 if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1031 !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
27bf91d6
YP
1032 for (i = 1; i <= mdev->caps.num_ports; i++) {
1033 if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1034 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1035 err = -EINVAL;
1036 }
1037 }
1038 }
1039 if (err) {
1a91de28 1040 mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
27bf91d6
YP
1041 goto out;
1042 }
1043
1044 mlx4_do_sense_ports(mdev, new_types, types);
1045
1046 err = mlx4_check_port_params(mdev, new_types);
7ff93f8b
YP
1047 if (err)
1048 goto out;
1049
27bf91d6
YP
1050 /* We are about to apply the changes after the configuration
1051 * was verified, no need to remember the temporary types
1052 * any more */
1053 for (i = 0; i < mdev->caps.num_ports; i++)
1054 priv->port[i + 1].tmp_type = 0;
7ff93f8b 1055
27bf91d6 1056 err = mlx4_change_port_types(mdev, new_types);
7ff93f8b
YP
1057
1058out:
27bf91d6 1059 mlx4_start_sense(mdev);
7ff93f8b 1060 mutex_unlock(&priv->port_mutex);
0a984556
AV
1061err_out:
1062 mutex_unlock(&set_port_type_mutex);
1063
7ff93f8b
YP
1064 return err ? err : count;
1065}
1066
096335b3
OG
1067enum ibta_mtu {
1068 IB_MTU_256 = 1,
1069 IB_MTU_512 = 2,
1070 IB_MTU_1024 = 3,
1071 IB_MTU_2048 = 4,
1072 IB_MTU_4096 = 5
1073};
1074
1075static inline int int_to_ibta_mtu(int mtu)
1076{
1077 switch (mtu) {
1078 case 256: return IB_MTU_256;
1079 case 512: return IB_MTU_512;
1080 case 1024: return IB_MTU_1024;
1081 case 2048: return IB_MTU_2048;
1082 case 4096: return IB_MTU_4096;
1083 default: return -1;
1084 }
1085}
1086
1087static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1088{
1089 switch (mtu) {
1090 case IB_MTU_256: return 256;
1091 case IB_MTU_512: return 512;
1092 case IB_MTU_1024: return 1024;
1093 case IB_MTU_2048: return 2048;
1094 case IB_MTU_4096: return 4096;
1095 default: return -1;
1096 }
1097}
1098
1099static ssize_t show_port_ib_mtu(struct device *dev,
1100 struct device_attribute *attr,
1101 char *buf)
1102{
1103 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1104 port_mtu_attr);
1105 struct mlx4_dev *mdev = info->dev;
1106
1107 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1108 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1109
1110 sprintf(buf, "%d\n",
1111 ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1112 return strlen(buf);
1113}
1114
1115static ssize_t set_port_ib_mtu(struct device *dev,
1116 struct device_attribute *attr,
1117 const char *buf, size_t count)
1118{
1119 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1120 port_mtu_attr);
1121 struct mlx4_dev *mdev = info->dev;
1122 struct mlx4_priv *priv = mlx4_priv(mdev);
1123 int err, port, mtu, ibta_mtu = -1;
1124
1125 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1126 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1127 return -EINVAL;
1128 }
1129
618fad95
DB
1130 err = kstrtoint(buf, 0, &mtu);
1131 if (!err)
096335b3
OG
1132 ibta_mtu = int_to_ibta_mtu(mtu);
1133
618fad95 1134 if (err || ibta_mtu < 0) {
096335b3
OG
1135 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1136 return -EINVAL;
1137 }
1138
1139 mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1140
1141 mlx4_stop_sense(mdev);
1142 mutex_lock(&priv->port_mutex);
1143 mlx4_unregister_device(mdev);
1144 for (port = 1; port <= mdev->caps.num_ports; port++) {
1145 mlx4_CLOSE_PORT(mdev, port);
6634961c 1146 err = mlx4_SET_PORT(mdev, port, -1);
096335b3 1147 if (err) {
1a91de28
JP
1148 mlx4_err(mdev, "Failed to set port %d, aborting\n",
1149 port);
096335b3
OG
1150 goto err_set_port;
1151 }
1152 }
1153 err = mlx4_register_device(mdev);
1154err_set_port:
1155 mutex_unlock(&priv->port_mutex);
1156 mlx4_start_sense(mdev);
1157 return err ? err : count;
1158}
1159
e8f9b2ed 1160static int mlx4_load_fw(struct mlx4_dev *dev)
225c7b1f
RD
1161{
1162 struct mlx4_priv *priv = mlx4_priv(dev);
1163 int err;
1164
1165 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
5b0bf5e2 1166 GFP_HIGHUSER | __GFP_NOWARN, 0);
225c7b1f 1167 if (!priv->fw.fw_icm) {
1a91de28 1168 mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
225c7b1f
RD
1169 return -ENOMEM;
1170 }
1171
1172 err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1173 if (err) {
1a91de28 1174 mlx4_err(dev, "MAP_FA command failed, aborting\n");
225c7b1f
RD
1175 goto err_free;
1176 }
1177
1178 err = mlx4_RUN_FW(dev);
1179 if (err) {
1a91de28 1180 mlx4_err(dev, "RUN_FW command failed, aborting\n");
225c7b1f
RD
1181 goto err_unmap_fa;
1182 }
1183
1184 return 0;
1185
1186err_unmap_fa:
1187 mlx4_UNMAP_FA(dev);
1188
1189err_free:
5b0bf5e2 1190 mlx4_free_icm(dev, priv->fw.fw_icm, 0);
225c7b1f
RD
1191 return err;
1192}
1193
e8f9b2ed
RD
1194static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1195 int cmpt_entry_sz)
225c7b1f
RD
1196{
1197 struct mlx4_priv *priv = mlx4_priv(dev);
1198 int err;
ab9c17a0 1199 int num_eqs;
225c7b1f
RD
1200
1201 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1202 cmpt_base +
1203 ((u64) (MLX4_CMPT_TYPE_QP *
1204 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1205 cmpt_entry_sz, dev->caps.num_qps,
93fc9e1b
YP
1206 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1207 0, 0);
225c7b1f
RD
1208 if (err)
1209 goto err;
1210
1211 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1212 cmpt_base +
1213 ((u64) (MLX4_CMPT_TYPE_SRQ *
1214 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1215 cmpt_entry_sz, dev->caps.num_srqs,
5b0bf5e2 1216 dev->caps.reserved_srqs, 0, 0);
225c7b1f
RD
1217 if (err)
1218 goto err_qp;
1219
1220 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1221 cmpt_base +
1222 ((u64) (MLX4_CMPT_TYPE_CQ *
1223 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1224 cmpt_entry_sz, dev->caps.num_cqs,
5b0bf5e2 1225 dev->caps.reserved_cqs, 0, 0);
225c7b1f
RD
1226 if (err)
1227 goto err_srq;
1228
7ae0e400 1229 num_eqs = dev->phys_caps.num_phys_eqs;
225c7b1f
RD
1230 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1231 cmpt_base +
1232 ((u64) (MLX4_CMPT_TYPE_EQ *
1233 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
ab9c17a0 1234 cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
225c7b1f
RD
1235 if (err)
1236 goto err_cq;
1237
1238 return 0;
1239
1240err_cq:
1241 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1242
1243err_srq:
1244 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1245
1246err_qp:
1247 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1248
1249err:
1250 return err;
1251}
1252
3d73c288
RD
1253static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1254 struct mlx4_init_hca_param *init_hca, u64 icm_size)
225c7b1f
RD
1255{
1256 struct mlx4_priv *priv = mlx4_priv(dev);
1257 u64 aux_pages;
ab9c17a0 1258 int num_eqs;
225c7b1f
RD
1259 int err;
1260
1261 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1262 if (err) {
1a91de28 1263 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
225c7b1f
RD
1264 return err;
1265 }
1266
1a91de28 1267 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
225c7b1f
RD
1268 (unsigned long long) icm_size >> 10,
1269 (unsigned long long) aux_pages << 2);
1270
1271 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
5b0bf5e2 1272 GFP_HIGHUSER | __GFP_NOWARN, 0);
225c7b1f 1273 if (!priv->fw.aux_icm) {
1a91de28 1274 mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
225c7b1f
RD
1275 return -ENOMEM;
1276 }
1277
1278 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1279 if (err) {
1a91de28 1280 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
225c7b1f
RD
1281 goto err_free_aux;
1282 }
1283
1284 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1285 if (err) {
1a91de28 1286 mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
225c7b1f
RD
1287 goto err_unmap_aux;
1288 }
1289
ab9c17a0 1290
7ae0e400 1291 num_eqs = dev->phys_caps.num_phys_eqs;
fa0681d2
RD
1292 err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1293 init_hca->eqc_base, dev_cap->eqc_entry_sz,
ab9c17a0 1294 num_eqs, num_eqs, 0, 0);
225c7b1f 1295 if (err) {
1a91de28 1296 mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
225c7b1f
RD
1297 goto err_unmap_cmpt;
1298 }
1299
d7bb58fb
JM
1300 /*
1301 * Reserved MTT entries must be aligned up to a cacheline
1302 * boundary, since the FW will write to them, while the driver
1303 * writes to all other MTT entries. (The variable
1304 * dev->caps.mtt_entry_sz below is really the MTT segment
1305 * size, not the raw entry size)
1306 */
1307 dev->caps.reserved_mtts =
1308 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1309 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1310
225c7b1f
RD
1311 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1312 init_hca->mtt_base,
1313 dev->caps.mtt_entry_sz,
2b8fb286 1314 dev->caps.num_mtts,
5b0bf5e2 1315 dev->caps.reserved_mtts, 1, 0);
225c7b1f 1316 if (err) {
1a91de28 1317 mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
225c7b1f
RD
1318 goto err_unmap_eq;
1319 }
1320
1321 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1322 init_hca->dmpt_base,
1323 dev_cap->dmpt_entry_sz,
1324 dev->caps.num_mpts,
5b0bf5e2 1325 dev->caps.reserved_mrws, 1, 1);
225c7b1f 1326 if (err) {
1a91de28 1327 mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
225c7b1f
RD
1328 goto err_unmap_mtt;
1329 }
1330
1331 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1332 init_hca->qpc_base,
1333 dev_cap->qpc_entry_sz,
1334 dev->caps.num_qps,
93fc9e1b
YP
1335 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1336 0, 0);
225c7b1f 1337 if (err) {
1a91de28 1338 mlx4_err(dev, "Failed to map QP context memory, aborting\n");
225c7b1f
RD
1339 goto err_unmap_dmpt;
1340 }
1341
1342 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1343 init_hca->auxc_base,
1344 dev_cap->aux_entry_sz,
1345 dev->caps.num_qps,
93fc9e1b
YP
1346 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1347 0, 0);
225c7b1f 1348 if (err) {
1a91de28 1349 mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
225c7b1f
RD
1350 goto err_unmap_qp;
1351 }
1352
1353 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1354 init_hca->altc_base,
1355 dev_cap->altc_entry_sz,
1356 dev->caps.num_qps,
93fc9e1b
YP
1357 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1358 0, 0);
225c7b1f 1359 if (err) {
1a91de28 1360 mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
225c7b1f
RD
1361 goto err_unmap_auxc;
1362 }
1363
1364 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1365 init_hca->rdmarc_base,
1366 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1367 dev->caps.num_qps,
93fc9e1b
YP
1368 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1369 0, 0);
225c7b1f
RD
1370 if (err) {
1371 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1372 goto err_unmap_altc;
1373 }
1374
1375 err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1376 init_hca->cqc_base,
1377 dev_cap->cqc_entry_sz,
1378 dev->caps.num_cqs,
5b0bf5e2 1379 dev->caps.reserved_cqs, 0, 0);
225c7b1f 1380 if (err) {
1a91de28 1381 mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
225c7b1f
RD
1382 goto err_unmap_rdmarc;
1383 }
1384
1385 err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1386 init_hca->srqc_base,
1387 dev_cap->srq_entry_sz,
1388 dev->caps.num_srqs,
5b0bf5e2 1389 dev->caps.reserved_srqs, 0, 0);
225c7b1f 1390 if (err) {
1a91de28 1391 mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
225c7b1f
RD
1392 goto err_unmap_cq;
1393 }
1394
1395 /*
0ff1fb65
HHZ
1396 * For flow steering device managed mode it is required to use
1397 * mlx4_init_icm_table. For B0 steering mode it's not strictly
1398 * required, but for simplicity just map the whole multicast
1399 * group table now. The table isn't very big and it's a lot
1400 * easier than trying to track ref counts.
225c7b1f
RD
1401 */
1402 err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
0ec2c0f8
EE
1403 init_hca->mc_base,
1404 mlx4_get_mgm_entry_size(dev),
225c7b1f
RD
1405 dev->caps.num_mgms + dev->caps.num_amgms,
1406 dev->caps.num_mgms + dev->caps.num_amgms,
5b0bf5e2 1407 0, 0);
225c7b1f 1408 if (err) {
1a91de28 1409 mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
225c7b1f
RD
1410 goto err_unmap_srq;
1411 }
1412
1413 return 0;
1414
1415err_unmap_srq:
1416 mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1417
1418err_unmap_cq:
1419 mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1420
1421err_unmap_rdmarc:
1422 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1423
1424err_unmap_altc:
1425 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1426
1427err_unmap_auxc:
1428 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1429
1430err_unmap_qp:
1431 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1432
1433err_unmap_dmpt:
1434 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1435
1436err_unmap_mtt:
1437 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1438
1439err_unmap_eq:
fa0681d2 1440 mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
225c7b1f
RD
1441
1442err_unmap_cmpt:
1443 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1444 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1445 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1446 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1447
1448err_unmap_aux:
1449 mlx4_UNMAP_ICM_AUX(dev);
1450
1451err_free_aux:
5b0bf5e2 1452 mlx4_free_icm(dev, priv->fw.aux_icm, 0);
225c7b1f
RD
1453
1454 return err;
1455}
1456
1457static void mlx4_free_icms(struct mlx4_dev *dev)
1458{
1459 struct mlx4_priv *priv = mlx4_priv(dev);
1460
1461 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1462 mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1463 mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1464 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1465 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1466 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1467 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1468 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1469 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
fa0681d2 1470 mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
225c7b1f
RD
1471 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1472 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1473 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1474 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
225c7b1f
RD
1475
1476 mlx4_UNMAP_ICM_AUX(dev);
5b0bf5e2 1477 mlx4_free_icm(dev, priv->fw.aux_icm, 0);
225c7b1f
RD
1478}
1479
ab9c17a0
JM
1480static void mlx4_slave_exit(struct mlx4_dev *dev)
1481{
1482 struct mlx4_priv *priv = mlx4_priv(dev);
1483
f3d4c89e 1484 mutex_lock(&priv->cmd.slave_cmd_mutex);
ab9c17a0 1485 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
1a91de28 1486 mlx4_warn(dev, "Failed to close slave function\n");
f3d4c89e 1487 mutex_unlock(&priv->cmd.slave_cmd_mutex);
ab9c17a0
JM
1488}
1489
c1b43dca
EC
1490static int map_bf_area(struct mlx4_dev *dev)
1491{
1492 struct mlx4_priv *priv = mlx4_priv(dev);
1493 resource_size_t bf_start;
1494 resource_size_t bf_len;
1495 int err = 0;
1496
3d747473
JM
1497 if (!dev->caps.bf_reg_size)
1498 return -ENXIO;
1499
872bf2fb 1500 bf_start = pci_resource_start(dev->persist->pdev, 2) +
ab9c17a0 1501 (dev->caps.num_uars << PAGE_SHIFT);
872bf2fb 1502 bf_len = pci_resource_len(dev->persist->pdev, 2) -
ab9c17a0 1503 (dev->caps.num_uars << PAGE_SHIFT);
c1b43dca
EC
1504 priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1505 if (!priv->bf_mapping)
1506 err = -ENOMEM;
1507
1508 return err;
1509}
1510
1511static void unmap_bf_area(struct mlx4_dev *dev)
1512{
1513 if (mlx4_priv(dev)->bf_mapping)
1514 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1515}
1516
ec693d47
AV
1517cycle_t mlx4_read_clock(struct mlx4_dev *dev)
1518{
1519 u32 clockhi, clocklo, clockhi1;
1520 cycle_t cycles;
1521 int i;
1522 struct mlx4_priv *priv = mlx4_priv(dev);
1523
1524 for (i = 0; i < 10; i++) {
1525 clockhi = swab32(readl(priv->clock_mapping));
1526 clocklo = swab32(readl(priv->clock_mapping + 4));
1527 clockhi1 = swab32(readl(priv->clock_mapping));
1528 if (clockhi == clockhi1)
1529 break;
1530 }
1531
1532 cycles = (u64) clockhi << 32 | (u64) clocklo;
1533
1534 return cycles;
1535}
1536EXPORT_SYMBOL_GPL(mlx4_read_clock);
1537
1538
ddd8a6c1
EE
1539static int map_internal_clock(struct mlx4_dev *dev)
1540{
1541 struct mlx4_priv *priv = mlx4_priv(dev);
1542
1543 priv->clock_mapping =
872bf2fb
YH
1544 ioremap(pci_resource_start(dev->persist->pdev,
1545 priv->fw.clock_bar) +
ddd8a6c1
EE
1546 priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1547
1548 if (!priv->clock_mapping)
1549 return -ENOMEM;
1550
1551 return 0;
1552}
1553
1554static void unmap_internal_clock(struct mlx4_dev *dev)
1555{
1556 struct mlx4_priv *priv = mlx4_priv(dev);
1557
1558 if (priv->clock_mapping)
1559 iounmap(priv->clock_mapping);
1560}
1561
225c7b1f
RD
1562static void mlx4_close_hca(struct mlx4_dev *dev)
1563{
ddd8a6c1 1564 unmap_internal_clock(dev);
c1b43dca 1565 unmap_bf_area(dev);
ab9c17a0
JM
1566 if (mlx4_is_slave(dev))
1567 mlx4_slave_exit(dev);
1568 else {
1569 mlx4_CLOSE_HCA(dev, 0);
1570 mlx4_free_icms(dev);
a0eacca9
MB
1571 }
1572}
1573
1574static void mlx4_close_fw(struct mlx4_dev *dev)
1575{
1576 if (!mlx4_is_slave(dev)) {
ab9c17a0
JM
1577 mlx4_UNMAP_FA(dev);
1578 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1579 }
1580}
1581
1582static int mlx4_init_slave(struct mlx4_dev *dev)
1583{
1584 struct mlx4_priv *priv = mlx4_priv(dev);
1585 u64 dma = (u64) priv->mfunc.vhcr_dma;
ab9c17a0
JM
1586 int ret_from_reset = 0;
1587 u32 slave_read;
1588 u32 cmd_channel_ver;
1589
97989356 1590 if (atomic_read(&pf_loading)) {
1a91de28 1591 mlx4_warn(dev, "PF is not ready - Deferring probe\n");
97989356
AV
1592 return -EPROBE_DEFER;
1593 }
1594
f3d4c89e 1595 mutex_lock(&priv->cmd.slave_cmd_mutex);
ab9c17a0
JM
1596 priv->cmd.max_cmds = 1;
1597 mlx4_warn(dev, "Sending reset\n");
1598 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1599 MLX4_COMM_TIME);
1600 /* if we are in the middle of flr the slave will try
1601 * NUM_OF_RESET_RETRIES times before leaving.*/
1602 if (ret_from_reset) {
1603 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1a91de28 1604 mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
5efe5355
JM
1605 mutex_unlock(&priv->cmd.slave_cmd_mutex);
1606 return -EPROBE_DEFER;
ab9c17a0
JM
1607 } else
1608 goto err;
1609 }
1610
1611 /* check the driver version - the slave I/F revision
1612 * must match the master's */
1613 slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1614 cmd_channel_ver = mlx4_comm_get_version();
1615
1616 if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1617 MLX4_COMM_GET_IF_REV(slave_read)) {
1a91de28 1618 mlx4_err(dev, "slave driver version is not supported by the master\n");
ab9c17a0
JM
1619 goto err;
1620 }
1621
1622 mlx4_warn(dev, "Sending vhcr0\n");
1623 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1624 MLX4_COMM_TIME))
1625 goto err;
1626 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1627 MLX4_COMM_TIME))
1628 goto err;
1629 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1630 MLX4_COMM_TIME))
1631 goto err;
1632 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
1633 goto err;
f3d4c89e
RD
1634
1635 mutex_unlock(&priv->cmd.slave_cmd_mutex);
ab9c17a0
JM
1636 return 0;
1637
1638err:
1639 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
f3d4c89e 1640 mutex_unlock(&priv->cmd.slave_cmd_mutex);
ab9c17a0 1641 return -EIO;
225c7b1f
RD
1642}
1643
6634961c
JM
1644static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
1645{
1646 int i;
1647
1648 for (i = 1; i <= dev->caps.num_ports; i++) {
b6ffaeff
JM
1649 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
1650 dev->caps.gid_table_len[i] =
449fc488 1651 mlx4_get_slave_num_gids(dev, 0, i);
b6ffaeff
JM
1652 else
1653 dev->caps.gid_table_len[i] = 1;
6634961c
JM
1654 dev->caps.pkey_table_len[i] =
1655 dev->phys_caps.pkey_phys_table_len[i] - 1;
1656 }
1657}
1658
3c439b55
JM
1659static int choose_log_fs_mgm_entry_size(int qp_per_entry)
1660{
1661 int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
1662
1663 for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
1664 i++) {
1665 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
1666 break;
1667 }
1668
1669 return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
1670}
1671
7d077cd3
MB
1672static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
1673{
1674 switch (dmfs_high_steer_mode) {
1675 case MLX4_STEERING_DMFS_A0_DEFAULT:
1676 return "default performance";
1677
1678 case MLX4_STEERING_DMFS_A0_DYNAMIC:
1679 return "dynamic hybrid mode";
1680
1681 case MLX4_STEERING_DMFS_A0_STATIC:
1682 return "performance optimized for limited rule configuration (static)";
1683
1684 case MLX4_STEERING_DMFS_A0_DISABLE:
1685 return "disabled performance optimized steering";
1686
1687 case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
1688 return "performance optimized steering not supported";
1689
1690 default:
1691 return "Unrecognized mode";
1692 }
1693}
1694
1695#define MLX4_DMFS_A0_STEERING (1UL << 2)
1696
7b8157be
JM
1697static void choose_steering_mode(struct mlx4_dev *dev,
1698 struct mlx4_dev_cap *dev_cap)
1699{
7d077cd3
MB
1700 if (mlx4_log_num_mgm_entry_size <= 0) {
1701 if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
1702 if (dev->caps.dmfs_high_steer_mode ==
1703 MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1704 mlx4_err(dev, "DMFS high rate mode not supported\n");
1705 else
1706 dev->caps.dmfs_high_steer_mode =
1707 MLX4_STEERING_DMFS_A0_STATIC;
1708 }
1709 }
1710
1711 if (mlx4_log_num_mgm_entry_size <= 0 &&
3c439b55 1712 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
7b8157be 1713 (!mlx4_is_mfunc(dev) ||
872bf2fb
YH
1714 (dev_cap->fs_max_num_qp_per_entry >=
1715 (dev->persist->num_vfs + 1))) &&
3c439b55
JM
1716 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
1717 MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
1718 dev->oper_log_mgm_entry_size =
1719 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
7b8157be
JM
1720 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
1721 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
1722 dev->caps.fs_log_max_ucast_qp_range_size =
1723 dev_cap->fs_log_max_ucast_qp_range_size;
1724 } else {
7d077cd3
MB
1725 if (dev->caps.dmfs_high_steer_mode !=
1726 MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1727 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
7b8157be
JM
1728 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
1729 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1730 dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
1731 else {
1732 dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
1733
1734 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
1735 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1a91de28 1736 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
7b8157be 1737 }
3c439b55
JM
1738 dev->oper_log_mgm_entry_size =
1739 mlx4_log_num_mgm_entry_size > 0 ?
1740 mlx4_log_num_mgm_entry_size :
1741 MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
7b8157be
JM
1742 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
1743 }
1a91de28 1744 mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
3c439b55
JM
1745 mlx4_steering_mode_str(dev->caps.steering_mode),
1746 dev->oper_log_mgm_entry_size,
1747 mlx4_log_num_mgm_entry_size);
7b8157be
JM
1748}
1749
7ffdf726
OG
1750static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
1751 struct mlx4_dev_cap *dev_cap)
1752{
1753 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
7d077cd3
MB
1754 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS &&
1755 dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC)
7ffdf726
OG
1756 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
1757 else
1758 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
1759
1760 mlx4_dbg(dev, "Tunneling offload mode is: %s\n", (dev->caps.tunnel_offload_mode
1761 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
1762}
1763
7d077cd3
MB
1764static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
1765{
1766 int i;
1767 struct mlx4_port_cap port_cap;
1768
1769 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1770 return -EINVAL;
1771
1772 for (i = 1; i <= dev->caps.num_ports; i++) {
1773 if (mlx4_dev_port(dev, i, &port_cap)) {
1774 mlx4_err(dev,
1775 "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
1776 } else if ((dev->caps.dmfs_high_steer_mode !=
1777 MLX4_STEERING_DMFS_A0_DEFAULT) &&
1778 (port_cap.dmfs_optimized_state ==
1779 !!(dev->caps.dmfs_high_steer_mode ==
1780 MLX4_STEERING_DMFS_A0_DISABLE))) {
1781 mlx4_err(dev,
1782 "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
1783 dmfs_high_rate_steering_mode_str(
1784 dev->caps.dmfs_high_steer_mode),
1785 (port_cap.dmfs_optimized_state ?
1786 "enabled" : "disabled"));
1787 }
1788 }
1789
1790 return 0;
1791}
1792
a0eacca9 1793static int mlx4_init_fw(struct mlx4_dev *dev)
225c7b1f 1794{
2d928651 1795 struct mlx4_mod_stat_cfg mlx4_cfg;
a0eacca9 1796 int err = 0;
225c7b1f 1797
ab9c17a0
JM
1798 if (!mlx4_is_slave(dev)) {
1799 err = mlx4_QUERY_FW(dev);
1800 if (err) {
1801 if (err == -EACCES)
1a91de28 1802 mlx4_info(dev, "non-primary physical function, skipping\n");
ab9c17a0 1803 else
1a91de28 1804 mlx4_err(dev, "QUERY_FW command failed, aborting\n");
bef772eb 1805 return err;
ab9c17a0 1806 }
225c7b1f 1807
ab9c17a0
JM
1808 err = mlx4_load_fw(dev);
1809 if (err) {
1a91de28 1810 mlx4_err(dev, "Failed to start FW, aborting\n");
bef772eb 1811 return err;
ab9c17a0 1812 }
225c7b1f 1813
ab9c17a0
JM
1814 mlx4_cfg.log_pg_sz_m = 1;
1815 mlx4_cfg.log_pg_sz = 0;
1816 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
1817 if (err)
1818 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
a0eacca9 1819 }
2d928651 1820
a0eacca9
MB
1821 return err;
1822}
1823
1824static int mlx4_init_hca(struct mlx4_dev *dev)
1825{
1826 struct mlx4_priv *priv = mlx4_priv(dev);
1827 struct mlx4_adapter adapter;
1828 struct mlx4_dev_cap dev_cap;
1829 struct mlx4_profile profile;
1830 struct mlx4_init_hca_param init_hca;
1831 u64 icm_size;
1832 struct mlx4_config_dev_params params;
1833 int err;
1834
1835 if (!mlx4_is_slave(dev)) {
ab9c17a0
JM
1836 err = mlx4_dev_cap(dev, &dev_cap);
1837 if (err) {
1a91de28 1838 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
d0d01250 1839 return err;
ab9c17a0 1840 }
225c7b1f 1841
7b8157be 1842 choose_steering_mode(dev, &dev_cap);
7ffdf726 1843 choose_tunnel_offload_mode(dev, &dev_cap);
7b8157be 1844
7d077cd3
MB
1845 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
1846 mlx4_is_master(dev))
1847 dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
1848
8e1a28e8
HHZ
1849 err = mlx4_get_phys_port_id(dev);
1850 if (err)
1851 mlx4_err(dev, "Fail to get physical port id\n");
1852
6634961c
JM
1853 if (mlx4_is_master(dev))
1854 mlx4_parav_master_pf_caps(dev);
1855
2599d858
AV
1856 if (mlx4_low_memory_profile()) {
1857 mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
1858 profile = low_mem_profile;
1859 } else {
1860 profile = default_profile;
1861 }
0ff1fb65
HHZ
1862 if (dev->caps.steering_mode ==
1863 MLX4_STEERING_MODE_DEVICE_MANAGED)
1864 profile.num_mcg = MLX4_FS_NUM_MCG;
225c7b1f 1865
ab9c17a0
JM
1866 icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
1867 &init_hca);
1868 if ((long long) icm_size < 0) {
1869 err = icm_size;
d0d01250 1870 return err;
ab9c17a0 1871 }
225c7b1f 1872
a5bbe892
EC
1873 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
1874
ab9c17a0
JM
1875 init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
1876 init_hca.uar_page_sz = PAGE_SHIFT - 12;
e448834e
SM
1877 init_hca.mw_enabled = 0;
1878 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
1879 dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
1880 init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
c1b43dca 1881
ab9c17a0
JM
1882 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
1883 if (err)
d0d01250 1884 return err;
225c7b1f 1885
ab9c17a0
JM
1886 err = mlx4_INIT_HCA(dev, &init_hca);
1887 if (err) {
1a91de28 1888 mlx4_err(dev, "INIT_HCA command failed, aborting\n");
ab9c17a0
JM
1889 goto err_free_icm;
1890 }
7ae0e400
MB
1891
1892 if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
1893 err = mlx4_query_func(dev, &dev_cap);
1894 if (err < 0) {
1895 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
d0d01250 1896 goto err_close;
7ae0e400
MB
1897 } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
1898 dev->caps.num_eqs = dev_cap.max_eqs;
1899 dev->caps.reserved_eqs = dev_cap.reserved_eqs;
1900 dev->caps.reserved_uars = dev_cap.reserved_uars;
1901 }
1902 }
1903
ddd8a6c1
EE
1904 /*
1905 * If TS is supported by FW
1906 * read HCA frequency by QUERY_HCA command
1907 */
1908 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
1909 memset(&init_hca, 0, sizeof(init_hca));
1910 err = mlx4_QUERY_HCA(dev, &init_hca);
1911 if (err) {
1a91de28 1912 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
ddd8a6c1
EE
1913 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1914 } else {
1915 dev->caps.hca_core_clock =
1916 init_hca.hca_core_clock;
1917 }
1918
1919 /* In case we got HCA frequency 0 - disable timestamping
1920 * to avoid dividing by zero
1921 */
1922 if (!dev->caps.hca_core_clock) {
1923 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1924 mlx4_err(dev,
1a91de28 1925 "HCA frequency is 0 - timestamping is not supported\n");
ddd8a6c1
EE
1926 } else if (map_internal_clock(dev)) {
1927 /*
1928 * Map internal clock,
1929 * in case of failure disable timestamping
1930 */
1931 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1a91de28 1932 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
ddd8a6c1
EE
1933 }
1934 }
7d077cd3
MB
1935
1936 if (dev->caps.dmfs_high_steer_mode !=
1937 MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
1938 if (mlx4_validate_optimized_steering(dev))
1939 mlx4_warn(dev, "Optimized steering validation failed\n");
1940
1941 if (dev->caps.dmfs_high_steer_mode ==
1942 MLX4_STEERING_DMFS_A0_DISABLE) {
1943 dev->caps.dmfs_high_rate_qpn_base =
1944 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
1945 dev->caps.dmfs_high_rate_qpn_range =
1946 MLX4_A0_STEERING_TABLE_SIZE;
1947 }
1948
1949 mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n",
1950 dmfs_high_rate_steering_mode_str(
1951 dev->caps.dmfs_high_steer_mode));
1952 }
ab9c17a0
JM
1953 } else {
1954 err = mlx4_init_slave(dev);
1955 if (err) {
5efe5355
JM
1956 if (err != -EPROBE_DEFER)
1957 mlx4_err(dev, "Failed to initialize slave\n");
bef772eb 1958 return err;
ab9c17a0 1959 }
225c7b1f 1960
ab9c17a0
JM
1961 err = mlx4_slave_cap(dev);
1962 if (err) {
1963 mlx4_err(dev, "Failed to obtain slave caps\n");
1964 goto err_close;
1965 }
225c7b1f
RD
1966 }
1967
ab9c17a0
JM
1968 if (map_bf_area(dev))
1969 mlx4_dbg(dev, "Failed to map blue flame area\n");
1970
1971 /*Only the master set the ports, all the rest got it from it.*/
1972 if (!mlx4_is_slave(dev))
1973 mlx4_set_port_mask(dev);
1974
225c7b1f
RD
1975 err = mlx4_QUERY_ADAPTER(dev, &adapter);
1976 if (err) {
1a91de28 1977 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
bef772eb 1978 goto unmap_bf;
225c7b1f
RD
1979 }
1980
f8c6455b
SM
1981 /* Query CONFIG_DEV parameters */
1982 err = mlx4_config_dev_retrieval(dev, &params);
1983 if (err && err != -ENOTSUPP) {
1984 mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
1985 } else if (!err) {
1986 dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
1987 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
1988 }
225c7b1f 1989 priv->eq_table.inta_pin = adapter.inta_pin;
cd9281d8 1990 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
225c7b1f
RD
1991
1992 return 0;
1993
bef772eb 1994unmap_bf:
ddd8a6c1 1995 unmap_internal_clock(dev);
bef772eb
AY
1996 unmap_bf_area(dev);
1997
b38f2879 1998 if (mlx4_is_slave(dev)) {
99ec41d0 1999 kfree(dev->caps.qp0_qkey);
b38f2879
DB
2000 kfree(dev->caps.qp0_tunnel);
2001 kfree(dev->caps.qp0_proxy);
2002 kfree(dev->caps.qp1_tunnel);
2003 kfree(dev->caps.qp1_proxy);
2004 }
2005
225c7b1f 2006err_close:
41929ed2
DB
2007 if (mlx4_is_slave(dev))
2008 mlx4_slave_exit(dev);
2009 else
2010 mlx4_CLOSE_HCA(dev, 0);
225c7b1f
RD
2011
2012err_free_icm:
ab9c17a0
JM
2013 if (!mlx4_is_slave(dev))
2014 mlx4_free_icms(dev);
225c7b1f 2015
225c7b1f
RD
2016 return err;
2017}
2018
f2a3f6a3
OG
2019static int mlx4_init_counters_table(struct mlx4_dev *dev)
2020{
2021 struct mlx4_priv *priv = mlx4_priv(dev);
2022 int nent;
2023
2024 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2025 return -ENOENT;
2026
2027 nent = dev->caps.max_counters;
2028 return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
2029}
2030
2031static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2032{
2033 mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2034}
2035
ba062d52 2036int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
f2a3f6a3
OG
2037{
2038 struct mlx4_priv *priv = mlx4_priv(dev);
2039
2040 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2041 return -ENOENT;
2042
2043 *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2044 if (*idx == -1)
2045 return -ENOMEM;
2046
2047 return 0;
2048}
ba062d52
JM
2049
2050int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2051{
2052 u64 out_param;
2053 int err;
2054
2055 if (mlx4_is_mfunc(dev)) {
2056 err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
2057 RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2058 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2059 if (!err)
2060 *idx = get_param_l(&out_param);
2061
2062 return err;
2063 }
2064 return __mlx4_counter_alloc(dev, idx);
2065}
f2a3f6a3
OG
2066EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2067
ba062d52 2068void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
f2a3f6a3 2069{
7c6d74d2 2070 mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
f2a3f6a3
OG
2071 return;
2072}
ba062d52
JM
2073
2074void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2075{
e7dbeba8 2076 u64 in_param = 0;
ba062d52
JM
2077
2078 if (mlx4_is_mfunc(dev)) {
2079 set_param_l(&in_param, idx);
2080 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2081 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2082 MLX4_CMD_WRAPPED);
2083 return;
2084 }
2085 __mlx4_counter_free(dev, idx);
2086}
f2a3f6a3
OG
2087EXPORT_SYMBOL_GPL(mlx4_counter_free);
2088
3d73c288 2089static int mlx4_setup_hca(struct mlx4_dev *dev)
225c7b1f
RD
2090{
2091 struct mlx4_priv *priv = mlx4_priv(dev);
2092 int err;
7ff93f8b 2093 int port;
9a5aa622 2094 __be32 ib_port_default_caps;
225c7b1f 2095
225c7b1f
RD
2096 err = mlx4_init_uar_table(dev);
2097 if (err) {
1a91de28
JP
2098 mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2099 return err;
225c7b1f
RD
2100 }
2101
2102 err = mlx4_uar_alloc(dev, &priv->driver_uar);
2103 if (err) {
1a91de28 2104 mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
225c7b1f
RD
2105 goto err_uar_table_free;
2106 }
2107
4979d18f 2108 priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
225c7b1f 2109 if (!priv->kar) {
1a91de28 2110 mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
225c7b1f
RD
2111 err = -ENOMEM;
2112 goto err_uar_free;
2113 }
2114
2115 err = mlx4_init_pd_table(dev);
2116 if (err) {
1a91de28 2117 mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
225c7b1f
RD
2118 goto err_kar_unmap;
2119 }
2120
012a8ff5
SH
2121 err = mlx4_init_xrcd_table(dev);
2122 if (err) {
1a91de28 2123 mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
012a8ff5
SH
2124 goto err_pd_table_free;
2125 }
2126
225c7b1f
RD
2127 err = mlx4_init_mr_table(dev);
2128 if (err) {
1a91de28 2129 mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
012a8ff5 2130 goto err_xrcd_table_free;
225c7b1f
RD
2131 }
2132
fe6f700d
YP
2133 if (!mlx4_is_slave(dev)) {
2134 err = mlx4_init_mcg_table(dev);
2135 if (err) {
1a91de28 2136 mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
fe6f700d
YP
2137 goto err_mr_table_free;
2138 }
114840c3
JM
2139 err = mlx4_config_mad_demux(dev);
2140 if (err) {
2141 mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2142 goto err_mcg_table_free;
2143 }
fe6f700d
YP
2144 }
2145
225c7b1f
RD
2146 err = mlx4_init_eq_table(dev);
2147 if (err) {
1a91de28 2148 mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
fe6f700d 2149 goto err_mcg_table_free;
225c7b1f
RD
2150 }
2151
2152 err = mlx4_cmd_use_events(dev);
2153 if (err) {
1a91de28 2154 mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
225c7b1f
RD
2155 goto err_eq_table_free;
2156 }
2157
2158 err = mlx4_NOP(dev);
2159 if (err) {
08fb1055 2160 if (dev->flags & MLX4_FLAG_MSI_X) {
1a91de28 2161 mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
b8dd786f 2162 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
1a91de28 2163 mlx4_warn(dev, "Trying again without MSI-X\n");
08fb1055 2164 } else {
1a91de28 2165 mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
b8dd786f 2166 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
225c7b1f 2167 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
08fb1055 2168 }
225c7b1f
RD
2169
2170 goto err_cmd_poll;
2171 }
2172
2173 mlx4_dbg(dev, "NOP command IRQ test passed\n");
2174
2175 err = mlx4_init_cq_table(dev);
2176 if (err) {
1a91de28 2177 mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
225c7b1f
RD
2178 goto err_cmd_poll;
2179 }
2180
2181 err = mlx4_init_srq_table(dev);
2182 if (err) {
1a91de28 2183 mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
225c7b1f
RD
2184 goto err_cq_table_free;
2185 }
2186
2187 err = mlx4_init_qp_table(dev);
2188 if (err) {
1a91de28 2189 mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
225c7b1f
RD
2190 goto err_srq_table_free;
2191 }
2192
f2a3f6a3
OG
2193 err = mlx4_init_counters_table(dev);
2194 if (err && err != -ENOENT) {
1a91de28 2195 mlx4_err(dev, "Failed to initialize counters table, aborting\n");
fe6f700d 2196 goto err_qp_table_free;
f2a3f6a3
OG
2197 }
2198
ab9c17a0
JM
2199 if (!mlx4_is_slave(dev)) {
2200 for (port = 1; port <= dev->caps.num_ports; port++) {
ab9c17a0
JM
2201 ib_port_default_caps = 0;
2202 err = mlx4_get_port_ib_caps(dev, port,
2203 &ib_port_default_caps);
2204 if (err)
1a91de28
JP
2205 mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2206 port, err);
ab9c17a0
JM
2207 dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2208
2aca1172
JM
2209 /* initialize per-slave default ib port capabilities */
2210 if (mlx4_is_master(dev)) {
2211 int i;
2212 for (i = 0; i < dev->num_slaves; i++) {
2213 if (i == mlx4_master_func_num(dev))
2214 continue;
2215 priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
1a91de28 2216 ib_port_default_caps;
2aca1172
JM
2217 }
2218 }
2219
096335b3
OG
2220 if (mlx4_is_mfunc(dev))
2221 dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2222 else
2223 dev->caps.port_ib_mtu[port] = IB_MTU_4096;
97285b78 2224
6634961c
JM
2225 err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2226 dev->caps.pkey_table_len[port] : -1);
ab9c17a0
JM
2227 if (err) {
2228 mlx4_err(dev, "Failed to set port %d, aborting\n",
1a91de28 2229 port);
ab9c17a0
JM
2230 goto err_counters_table_free;
2231 }
7ff93f8b
YP
2232 }
2233 }
2234
225c7b1f
RD
2235 return 0;
2236
f2a3f6a3
OG
2237err_counters_table_free:
2238 mlx4_cleanup_counters_table(dev);
2239
225c7b1f
RD
2240err_qp_table_free:
2241 mlx4_cleanup_qp_table(dev);
2242
2243err_srq_table_free:
2244 mlx4_cleanup_srq_table(dev);
2245
2246err_cq_table_free:
2247 mlx4_cleanup_cq_table(dev);
2248
2249err_cmd_poll:
2250 mlx4_cmd_use_polling(dev);
2251
2252err_eq_table_free:
2253 mlx4_cleanup_eq_table(dev);
2254
fe6f700d
YP
2255err_mcg_table_free:
2256 if (!mlx4_is_slave(dev))
2257 mlx4_cleanup_mcg_table(dev);
2258
ee49bd93 2259err_mr_table_free:
225c7b1f
RD
2260 mlx4_cleanup_mr_table(dev);
2261
012a8ff5
SH
2262err_xrcd_table_free:
2263 mlx4_cleanup_xrcd_table(dev);
2264
225c7b1f
RD
2265err_pd_table_free:
2266 mlx4_cleanup_pd_table(dev);
2267
2268err_kar_unmap:
2269 iounmap(priv->kar);
2270
2271err_uar_free:
2272 mlx4_uar_free(dev, &priv->driver_uar);
2273
2274err_uar_table_free:
2275 mlx4_cleanup_uar_table(dev);
2276 return err;
2277}
2278
e8f9b2ed 2279static void mlx4_enable_msi_x(struct mlx4_dev *dev)
225c7b1f
RD
2280{
2281 struct mlx4_priv *priv = mlx4_priv(dev);
b8dd786f 2282 struct msix_entry *entries;
225c7b1f
RD
2283 int i;
2284
2285 if (msi_x) {
7ae0e400
MB
2286 int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;
2287
ca4c7b35
OG
2288 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2289 nreq);
ab9c17a0 2290
b8dd786f
YP
2291 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
2292 if (!entries)
2293 goto no_msi;
2294
2295 for (i = 0; i < nreq; ++i)
225c7b1f
RD
2296 entries[i].entry = i;
2297
872bf2fb
YH
2298 nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2299 nreq);
66e2f9c1
AG
2300
2301 if (nreq < 0) {
5bf0da7d 2302 kfree(entries);
225c7b1f 2303 goto no_msi;
66e2f9c1 2304 } else if (nreq < MSIX_LEGACY_SZ +
1a91de28 2305 dev->caps.num_ports * MIN_MSIX_P_PORT) {
0b7ca5a9
YP
2306 /*Working in legacy mode , all EQ's shared*/
2307 dev->caps.comp_pool = 0;
2308 dev->caps.num_comp_vectors = nreq - 1;
2309 } else {
2310 dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ;
2311 dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
2312 }
b8dd786f 2313 for (i = 0; i < nreq; ++i)
225c7b1f
RD
2314 priv->eq_table.eq[i].irq = entries[i].vector;
2315
2316 dev->flags |= MLX4_FLAG_MSI_X;
b8dd786f
YP
2317
2318 kfree(entries);
225c7b1f
RD
2319 return;
2320 }
2321
2322no_msi:
b8dd786f 2323 dev->caps.num_comp_vectors = 1;
0b7ca5a9 2324 dev->caps.comp_pool = 0;
b8dd786f
YP
2325
2326 for (i = 0; i < 2; ++i)
872bf2fb 2327 priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
225c7b1f
RD
2328}
2329
7ff93f8b 2330static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2a2336f8
YP
2331{
2332 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
7ff93f8b 2333 int err = 0;
2a2336f8
YP
2334
2335 info->dev = dev;
2336 info->port = port;
ab9c17a0 2337 if (!mlx4_is_slave(dev)) {
ab9c17a0
JM
2338 mlx4_init_mac_table(dev, &info->mac_table);
2339 mlx4_init_vlan_table(dev, &info->vlan_table);
111c6094 2340 mlx4_init_roce_gid_table(dev, &info->gid_table);
16a10ffd 2341 info->base_qpn = mlx4_get_base_qpn(dev, port);
ab9c17a0 2342 }
7ff93f8b
YP
2343
2344 sprintf(info->dev_name, "mlx4_port%d", port);
2345 info->port_attr.attr.name = info->dev_name;
ab9c17a0
JM
2346 if (mlx4_is_mfunc(dev))
2347 info->port_attr.attr.mode = S_IRUGO;
2348 else {
2349 info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
2350 info->port_attr.store = set_port_type;
2351 }
7ff93f8b 2352 info->port_attr.show = show_port_type;
3691c964 2353 sysfs_attr_init(&info->port_attr.attr);
7ff93f8b 2354
872bf2fb 2355 err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
7ff93f8b
YP
2356 if (err) {
2357 mlx4_err(dev, "Failed to create file for port %d\n", port);
2358 info->port = -1;
2359 }
2360
096335b3
OG
2361 sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
2362 info->port_mtu_attr.attr.name = info->dev_mtu_name;
2363 if (mlx4_is_mfunc(dev))
2364 info->port_mtu_attr.attr.mode = S_IRUGO;
2365 else {
2366 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
2367 info->port_mtu_attr.store = set_port_ib_mtu;
2368 }
2369 info->port_mtu_attr.show = show_port_ib_mtu;
2370 sysfs_attr_init(&info->port_mtu_attr.attr);
2371
872bf2fb
YH
2372 err = device_create_file(&dev->persist->pdev->dev,
2373 &info->port_mtu_attr);
096335b3
OG
2374 if (err) {
2375 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
872bf2fb
YH
2376 device_remove_file(&info->dev->persist->pdev->dev,
2377 &info->port_attr);
096335b3
OG
2378 info->port = -1;
2379 }
2380
7ff93f8b
YP
2381 return err;
2382}
2383
2384static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
2385{
2386 if (info->port < 0)
2387 return;
2388
872bf2fb
YH
2389 device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
2390 device_remove_file(&info->dev->persist->pdev->dev,
2391 &info->port_mtu_attr);
2a2336f8
YP
2392}
2393
b12d93d6
YP
2394static int mlx4_init_steering(struct mlx4_dev *dev)
2395{
2396 struct mlx4_priv *priv = mlx4_priv(dev);
2397 int num_entries = dev->caps.num_ports;
2398 int i, j;
2399
2400 priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
2401 if (!priv->steer)
2402 return -ENOMEM;
2403
45b51365 2404 for (i = 0; i < num_entries; i++)
b12d93d6
YP
2405 for (j = 0; j < MLX4_NUM_STEERS; j++) {
2406 INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
2407 INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
2408 }
b12d93d6
YP
2409 return 0;
2410}
2411
2412static void mlx4_clear_steering(struct mlx4_dev *dev)
2413{
2414 struct mlx4_priv *priv = mlx4_priv(dev);
2415 struct mlx4_steer_index *entry, *tmp_entry;
2416 struct mlx4_promisc_qp *pqp, *tmp_pqp;
2417 int num_entries = dev->caps.num_ports;
2418 int i, j;
2419
2420 for (i = 0; i < num_entries; i++) {
2421 for (j = 0; j < MLX4_NUM_STEERS; j++) {
2422 list_for_each_entry_safe(pqp, tmp_pqp,
2423 &priv->steer[i].promisc_qps[j],
2424 list) {
2425 list_del(&pqp->list);
2426 kfree(pqp);
2427 }
2428 list_for_each_entry_safe(entry, tmp_entry,
2429 &priv->steer[i].steer_entries[j],
2430 list) {
2431 list_del(&entry->list);
2432 list_for_each_entry_safe(pqp, tmp_pqp,
2433 &entry->duplicates,
2434 list) {
2435 list_del(&pqp->list);
2436 kfree(pqp);
2437 }
2438 kfree(entry);
2439 }
2440 }
2441 }
2442 kfree(priv->steer);
2443}
2444
ab9c17a0
JM
2445static int extended_func_num(struct pci_dev *pdev)
2446{
2447 return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
2448}
2449
2450#define MLX4_OWNER_BASE 0x8069c
2451#define MLX4_OWNER_SIZE 4
2452
2453static int mlx4_get_ownership(struct mlx4_dev *dev)
2454{
2455 void __iomem *owner;
2456 u32 ret;
2457
872bf2fb 2458 if (pci_channel_offline(dev->persist->pdev))
57dbf29a
KSS
2459 return -EIO;
2460
872bf2fb
YH
2461 owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
2462 MLX4_OWNER_BASE,
ab9c17a0
JM
2463 MLX4_OWNER_SIZE);
2464 if (!owner) {
2465 mlx4_err(dev, "Failed to obtain ownership bit\n");
2466 return -ENOMEM;
2467 }
2468
2469 ret = readl(owner);
2470 iounmap(owner);
2471 return (int) !!ret;
2472}
2473
2474static void mlx4_free_ownership(struct mlx4_dev *dev)
2475{
2476 void __iomem *owner;
2477
872bf2fb 2478 if (pci_channel_offline(dev->persist->pdev))
57dbf29a
KSS
2479 return;
2480
872bf2fb
YH
2481 owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
2482 MLX4_OWNER_BASE,
ab9c17a0
JM
2483 MLX4_OWNER_SIZE);
2484 if (!owner) {
2485 mlx4_err(dev, "Failed to obtain ownership bit\n");
2486 return;
2487 }
2488 writel(0, owner);
2489 msleep(1000);
2490 iounmap(owner);
2491}
2492
a0eacca9
MB
2493#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\
2494 !!((flags) & MLX4_FLAG_MASTER))
2495
2496static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
2497 u8 total_vfs, int existing_vfs)
2498{
2499 u64 dev_flags = dev->flags;
da315679 2500 int err = 0;
a0eacca9 2501
da315679
MB
2502 atomic_inc(&pf_loading);
2503 if (dev->flags & MLX4_FLAG_SRIOV) {
2504 if (existing_vfs != total_vfs) {
2505 mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
2506 existing_vfs, total_vfs);
2507 total_vfs = existing_vfs;
2508 }
2509 }
2510
2511 dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
a0eacca9
MB
2512 if (NULL == dev->dev_vfs) {
2513 mlx4_err(dev, "Failed to allocate memory for VFs\n");
2514 goto disable_sriov;
da315679
MB
2515 }
2516
2517 if (!(dev->flags & MLX4_FLAG_SRIOV)) {
2518 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
2519 err = pci_enable_sriov(pdev, total_vfs);
2520 }
2521 if (err) {
2522 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
2523 err);
2524 goto disable_sriov;
2525 } else {
2526 mlx4_warn(dev, "Running in master mode\n");
2527 dev_flags |= MLX4_FLAG_SRIOV |
2528 MLX4_FLAG_MASTER;
2529 dev_flags &= ~MLX4_FLAG_SLAVE;
872bf2fb 2530 dev->persist->num_vfs = total_vfs;
a0eacca9
MB
2531 }
2532 return dev_flags;
2533
2534disable_sriov:
da315679 2535 atomic_dec(&pf_loading);
872bf2fb 2536 dev->persist->num_vfs = 0;
a0eacca9
MB
2537 kfree(dev->dev_vfs);
2538 return dev_flags & ~MLX4_FLAG_MASTER;
2539}
2540
de966c59
MB
2541enum {
2542 MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
2543};
2544
2545static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
2546 int *nvfs)
2547{
2548 int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
2549 /* Checking for 64 VFs as a limitation of CX2 */
2550 if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
2551 requested_vfs >= 64) {
2552 mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
2553 requested_vfs);
2554 return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
2555 }
2556 return 0;
2557}
2558
e1c00e10
MD
2559static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
2560 int total_vfs, int *nvfs, struct mlx4_priv *priv)
225c7b1f 2561{
225c7b1f 2562 struct mlx4_dev *dev;
e1c00e10 2563 unsigned sum = 0;
225c7b1f 2564 int err;
2a2336f8 2565 int port;
e1c00e10 2566 int i;
7ae0e400 2567 struct mlx4_dev_cap *dev_cap = NULL;
bbb07af4 2568 int existing_vfs = 0;
225c7b1f 2569
e1c00e10 2570 dev = &priv->dev;
225c7b1f 2571
b581401e
RD
2572 INIT_LIST_HEAD(&priv->ctx_list);
2573 spin_lock_init(&priv->ctx_lock);
225c7b1f 2574
7ff93f8b
YP
2575 mutex_init(&priv->port_mutex);
2576
6296883c
YP
2577 INIT_LIST_HEAD(&priv->pgdir_list);
2578 mutex_init(&priv->pgdir_mutex);
2579
c1b43dca
EC
2580 INIT_LIST_HEAD(&priv->bf_list);
2581 mutex_init(&priv->bf_mutex);
2582
aca7a3ac 2583 dev->rev_id = pdev->revision;
6e7136ed 2584 dev->numa_node = dev_to_node(&pdev->dev);
e1c00e10 2585
ab9c17a0 2586 /* Detect if this device is a virtual function */
839f1243 2587 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
ab9c17a0
JM
2588 mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
2589 dev->flags |= MLX4_FLAG_SLAVE;
2590 } else {
2591 /* We reset the device and enable SRIOV only for physical
2592 * devices. Try to claim ownership on the device;
2593 * if already taken, skip -- do not allow multiple PFs */
2594 err = mlx4_get_ownership(dev);
2595 if (err) {
2596 if (err < 0)
e1c00e10 2597 return err;
ab9c17a0 2598 else {
1a91de28 2599 mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
e1c00e10 2600 return -EINVAL;
ab9c17a0
JM
2601 }
2602 }
aca7a3ac 2603
fe6f700d
YP
2604 atomic_set(&priv->opreq_count, 0);
2605 INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
2606
ab9c17a0
JM
2607 /*
2608 * Now reset the HCA before we touch the PCI capabilities or
2609 * attempt a firmware command, since a boot ROM may have left
2610 * the HCA in an undefined state.
2611 */
2612 err = mlx4_reset(dev);
2613 if (err) {
1a91de28 2614 mlx4_err(dev, "Failed to reset HCA, aborting\n");
e1c00e10 2615 goto err_sriov;
ab9c17a0 2616 }
7ae0e400
MB
2617
2618 if (total_vfs) {
7ae0e400 2619 dev->flags = MLX4_FLAG_MASTER;
da315679
MB
2620 existing_vfs = pci_num_vf(pdev);
2621 if (existing_vfs)
2622 dev->flags |= MLX4_FLAG_SRIOV;
872bf2fb 2623 dev->persist->num_vfs = total_vfs;
7ae0e400 2624 }
225c7b1f
RD
2625 }
2626
ab9c17a0 2627slave_start:
521130d1
EE
2628 err = mlx4_cmd_init(dev);
2629 if (err) {
1a91de28 2630 mlx4_err(dev, "Failed to init command interface, aborting\n");
ab9c17a0
JM
2631 goto err_sriov;
2632 }
2633
2634 /* In slave functions, the communication channel must be initialized
2635 * before posting commands. Also, init num_slaves before calling
2636 * mlx4_init_hca */
2637 if (mlx4_is_mfunc(dev)) {
7ae0e400 2638 if (mlx4_is_master(dev)) {
ab9c17a0 2639 dev->num_slaves = MLX4_MAX_NUM_SLAVES;
7ae0e400
MB
2640
2641 } else {
ab9c17a0 2642 dev->num_slaves = 0;
f356fcbe
JM
2643 err = mlx4_multi_func_init(dev);
2644 if (err) {
1a91de28 2645 mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
ab9c17a0
JM
2646 goto err_cmd;
2647 }
2648 }
225c7b1f
RD
2649 }
2650
a0eacca9
MB
2651 err = mlx4_init_fw(dev);
2652 if (err) {
2653 mlx4_err(dev, "Failed to init fw, aborting.\n");
2654 goto err_mfunc;
2655 }
2656
7ae0e400 2657 if (mlx4_is_master(dev)) {
da315679 2658 /* when we hit the goto slave_start below, dev_cap already initialized */
7ae0e400
MB
2659 if (!dev_cap) {
2660 dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
2661
2662 if (!dev_cap) {
2663 err = -ENOMEM;
2664 goto err_fw;
2665 }
2666
2667 err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
2668 if (err) {
2669 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2670 goto err_fw;
2671 }
2672
de966c59
MB
2673 if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
2674 goto err_fw;
2675
7ae0e400
MB
2676 if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2677 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
2678 existing_vfs);
2679
2680 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
2681 dev->flags = dev_flags;
2682 if (!SRIOV_VALID_STATE(dev->flags)) {
2683 mlx4_err(dev, "Invalid SRIOV state\n");
2684 goto err_sriov;
2685 }
2686 err = mlx4_reset(dev);
2687 if (err) {
2688 mlx4_err(dev, "Failed to reset HCA, aborting.\n");
2689 goto err_sriov;
2690 }
2691 goto slave_start;
2692 }
2693 } else {
2694 /* Legacy mode FW requires SRIOV to be enabled before
2695 * doing QUERY_DEV_CAP, since max_eq's value is different if
2696 * SRIOV is enabled.
2697 */
2698 memset(dev_cap, 0, sizeof(*dev_cap));
2699 err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
2700 if (err) {
2701 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
2702 goto err_fw;
2703 }
de966c59
MB
2704
2705 if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
2706 goto err_fw;
7ae0e400
MB
2707 }
2708 }
2709
225c7b1f 2710 err = mlx4_init_hca(dev);
ab9c17a0
JM
2711 if (err) {
2712 if (err == -EACCES) {
2713 /* Not primary Physical function
2714 * Running in slave mode */
ffc39f6d 2715 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
a0eacca9
MB
2716 /* We're not a PF */
2717 if (dev->flags & MLX4_FLAG_SRIOV) {
2718 if (!existing_vfs)
2719 pci_disable_sriov(pdev);
2720 if (mlx4_is_master(dev))
2721 atomic_dec(&pf_loading);
2722 dev->flags &= ~MLX4_FLAG_SRIOV;
2723 }
2724 if (!mlx4_is_slave(dev))
2725 mlx4_free_ownership(dev);
ab9c17a0
JM
2726 dev->flags |= MLX4_FLAG_SLAVE;
2727 dev->flags &= ~MLX4_FLAG_MASTER;
2728 goto slave_start;
2729 } else
a0eacca9 2730 goto err_fw;
ab9c17a0
JM
2731 }
2732
7ae0e400
MB
2733 if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2734 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs);
2735
2736 if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
2737 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
2738 dev->flags = dev_flags;
2739 err = mlx4_cmd_init(dev);
2740 if (err) {
2741 /* Only VHCR is cleaned up, so could still
2742 * send FW commands
2743 */
2744 mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
2745 goto err_close;
2746 }
2747 } else {
2748 dev->flags = dev_flags;
2749 }
2750
2751 if (!SRIOV_VALID_STATE(dev->flags)) {
2752 mlx4_err(dev, "Invalid SRIOV state\n");
2753 goto err_close;
2754 }
2755 }
2756
b912b2f8
EP
2757 /* check if the device is functioning at its maximum possible speed.
2758 * No return code for this call, just warn the user in case of PCI
2759 * express device capabilities are under-satisfied by the bus.
2760 */
83d3459a
EP
2761 if (!mlx4_is_slave(dev))
2762 mlx4_check_pcie_caps(dev);
b912b2f8 2763
ab9c17a0
JM
2764 /* In master functions, the communication channel must be initialized
2765 * after obtaining its address from fw */
2766 if (mlx4_is_master(dev)) {
e1c00e10
MD
2767 int ib_ports = 0;
2768
2769 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2770 ib_ports++;
2771
2772 if (ib_ports &&
2773 (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
2774 mlx4_err(dev,
2775 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
2776 err = -EINVAL;
2777 goto err_close;
2778 }
2779 if (dev->caps.num_ports < 2 &&
2780 num_vfs_argc > 1) {
2781 err = -EINVAL;
2782 mlx4_err(dev,
2783 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
2784 dev->caps.num_ports);
ab9c17a0
JM
2785 goto err_close;
2786 }
872bf2fb 2787 memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
dd41cc3b 2788
872bf2fb
YH
2789 for (i = 0;
2790 i < sizeof(dev->persist->nvfs)/
2791 sizeof(dev->persist->nvfs[0]); i++) {
e1c00e10
MD
2792 unsigned j;
2793
872bf2fb 2794 for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
e1c00e10
MD
2795 dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
2796 dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
2797 dev->caps.num_ports;
1ab95d37
MB
2798 }
2799 }
e1c00e10
MD
2800
2801 /* In master functions, the communication channel
2802 * must be initialized after obtaining its address from fw
2803 */
2804 err = mlx4_multi_func_init(dev);
2805 if (err) {
2806 mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
2807 goto err_close;
2808 }
ab9c17a0 2809 }
225c7b1f 2810
b8dd786f
YP
2811 err = mlx4_alloc_eq_table(dev);
2812 if (err)
ab9c17a0 2813 goto err_master_mfunc;
b8dd786f 2814
0b7ca5a9 2815 priv->msix_ctl.pool_bm = 0;
730c41d5 2816 mutex_init(&priv->msix_ctl.pool_lock);
0b7ca5a9 2817
08fb1055 2818 mlx4_enable_msi_x(dev);
ab9c17a0
JM
2819 if ((mlx4_is_mfunc(dev)) &&
2820 !(dev->flags & MLX4_FLAG_MSI_X)) {
f356fcbe 2821 err = -ENOSYS;
1a91de28 2822 mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
b12d93d6 2823 goto err_free_eq;
ab9c17a0
JM
2824 }
2825
2826 if (!mlx4_is_slave(dev)) {
2827 err = mlx4_init_steering(dev);
2828 if (err)
e1c00e10 2829 goto err_disable_msix;
ab9c17a0 2830 }
b12d93d6 2831
225c7b1f 2832 err = mlx4_setup_hca(dev);
ab9c17a0
JM
2833 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
2834 !mlx4_is_mfunc(dev)) {
08fb1055 2835 dev->flags &= ~MLX4_FLAG_MSI_X;
9858d2d1
YP
2836 dev->caps.num_comp_vectors = 1;
2837 dev->caps.comp_pool = 0;
08fb1055
MT
2838 pci_disable_msix(pdev);
2839 err = mlx4_setup_hca(dev);
2840 }
2841
225c7b1f 2842 if (err)
b12d93d6 2843 goto err_steer;
225c7b1f 2844
5a0d0a61
JM
2845 mlx4_init_quotas(dev);
2846
7ff93f8b
YP
2847 for (port = 1; port <= dev->caps.num_ports; port++) {
2848 err = mlx4_init_port_info(dev, port);
2849 if (err)
2850 goto err_port;
2851 }
2a2336f8 2852
225c7b1f
RD
2853 err = mlx4_register_device(dev);
2854 if (err)
7ff93f8b 2855 goto err_port;
225c7b1f 2856
b046ffe5
EP
2857 mlx4_request_modules(dev);
2858
27bf91d6
YP
2859 mlx4_sense_init(dev);
2860 mlx4_start_sense(dev);
2861
befdf897 2862 priv->removed = 0;
225c7b1f 2863
872bf2fb 2864 if (mlx4_is_master(dev) && dev->persist->num_vfs)
e1a5ddc5
AV
2865 atomic_dec(&pf_loading);
2866
da315679 2867 kfree(dev_cap);
225c7b1f
RD
2868 return 0;
2869
7ff93f8b 2870err_port:
b4f77264 2871 for (--port; port >= 1; --port)
7ff93f8b
YP
2872 mlx4_cleanup_port_info(&priv->port[port]);
2873
f2a3f6a3 2874 mlx4_cleanup_counters_table(dev);
225c7b1f
RD
2875 mlx4_cleanup_qp_table(dev);
2876 mlx4_cleanup_srq_table(dev);
2877 mlx4_cleanup_cq_table(dev);
2878 mlx4_cmd_use_polling(dev);
2879 mlx4_cleanup_eq_table(dev);
fe6f700d 2880 mlx4_cleanup_mcg_table(dev);
225c7b1f 2881 mlx4_cleanup_mr_table(dev);
012a8ff5 2882 mlx4_cleanup_xrcd_table(dev);
225c7b1f
RD
2883 mlx4_cleanup_pd_table(dev);
2884 mlx4_cleanup_uar_table(dev);
2885
b12d93d6 2886err_steer:
ab9c17a0
JM
2887 if (!mlx4_is_slave(dev))
2888 mlx4_clear_steering(dev);
b12d93d6 2889
e1c00e10
MD
2890err_disable_msix:
2891 if (dev->flags & MLX4_FLAG_MSI_X)
2892 pci_disable_msix(pdev);
2893
b8dd786f
YP
2894err_free_eq:
2895 mlx4_free_eq_table(dev);
2896
ab9c17a0
JM
2897err_master_mfunc:
2898 if (mlx4_is_master(dev))
2899 mlx4_multi_func_cleanup(dev);
2900
b38f2879 2901 if (mlx4_is_slave(dev)) {
99ec41d0 2902 kfree(dev->caps.qp0_qkey);
b38f2879
DB
2903 kfree(dev->caps.qp0_tunnel);
2904 kfree(dev->caps.qp0_proxy);
2905 kfree(dev->caps.qp1_tunnel);
2906 kfree(dev->caps.qp1_proxy);
2907 }
2908
225c7b1f
RD
2909err_close:
2910 mlx4_close_hca(dev);
2911
a0eacca9
MB
2912err_fw:
2913 mlx4_close_fw(dev);
2914
ab9c17a0
JM
2915err_mfunc:
2916 if (mlx4_is_slave(dev))
2917 mlx4_multi_func_cleanup(dev);
2918
225c7b1f 2919err_cmd:
ffc39f6d 2920 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
225c7b1f 2921
ab9c17a0 2922err_sriov:
bbb07af4 2923 if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
ab9c17a0
JM
2924 pci_disable_sriov(pdev);
2925
872bf2fb 2926 if (mlx4_is_master(dev) && dev->persist->num_vfs)
e1a5ddc5
AV
2927 atomic_dec(&pf_loading);
2928
1ab95d37
MB
2929 kfree(priv->dev.dev_vfs);
2930
e1c00e10
MD
2931 if (!mlx4_is_slave(dev))
2932 mlx4_free_ownership(dev);
2933
7ae0e400 2934 kfree(dev_cap);
e1c00e10
MD
2935 return err;
2936}
2937
2938static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
2939 struct mlx4_priv *priv)
2940{
2941 int err;
2942 int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
2943 int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
2944 const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
2945 {2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
2946 unsigned total_vfs = 0;
2947 unsigned int i;
2948
2949 pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
2950
2951 err = pci_enable_device(pdev);
2952 if (err) {
2953 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
2954 return err;
2955 }
2956
2957 /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
2958 * per port, we must limit the number of VFs to 63 (since their are
2959 * 128 MACs)
2960 */
2961 for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
2962 total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
2963 nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
2964 if (nvfs[i] < 0) {
2965 dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
2966 err = -EINVAL;
2967 goto err_disable_pdev;
2968 }
2969 }
2970 for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
2971 i++) {
2972 prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
2973 if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
2974 dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
2975 err = -EINVAL;
2976 goto err_disable_pdev;
2977 }
2978 }
2979 if (total_vfs >= MLX4_MAX_NUM_VF) {
2980 dev_err(&pdev->dev,
2981 "Requested more VF's (%d) than allowed (%d)\n",
2982 total_vfs, MLX4_MAX_NUM_VF - 1);
2983 err = -EINVAL;
2984 goto err_disable_pdev;
2985 }
2986
2987 for (i = 0; i < MLX4_MAX_PORTS; i++) {
2988 if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
2989 dev_err(&pdev->dev,
2990 "Requested more VF's (%d) for port (%d) than allowed (%d)\n",
2991 nvfs[i] + nvfs[2], i + 1,
2992 MLX4_MAX_NUM_VF_P_PORT - 1);
2993 err = -EINVAL;
2994 goto err_disable_pdev;
2995 }
2996 }
2997
2998 /* Check for BARs. */
2999 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3000 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3001 dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3002 pci_dev_data, pci_resource_flags(pdev, 0));
3003 err = -ENODEV;
3004 goto err_disable_pdev;
3005 }
3006 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3007 dev_err(&pdev->dev, "Missing UAR, aborting\n");
3008 err = -ENODEV;
3009 goto err_disable_pdev;
3010 }
3011
3012 err = pci_request_regions(pdev, DRV_NAME);
3013 if (err) {
3014 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3015 goto err_disable_pdev;
3016 }
3017
3018 pci_set_master(pdev);
3019
3020 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3021 if (err) {
3022 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3023 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3024 if (err) {
3025 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3026 goto err_release_regions;
3027 }
3028 }
3029 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3030 if (err) {
3031 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3032 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3033 if (err) {
3034 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3035 goto err_release_regions;
3036 }
3037 }
3038
3039 /* Allow large DMA segments, up to the firmware limit of 1 GB */
3040 dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3041 /* Detect if this device is a virtual function */
3042 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3043 /* When acting as pf, we normally skip vfs unless explicitly
3044 * requested to probe them.
3045 */
3046 if (total_vfs) {
3047 unsigned vfs_offset = 0;
3048
3049 for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
3050 vfs_offset + nvfs[i] < extended_func_num(pdev);
3051 vfs_offset += nvfs[i], i++)
3052 ;
3053 if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
3054 err = -ENODEV;
3055 goto err_release_regions;
3056 }
3057 if ((extended_func_num(pdev) - vfs_offset)
3058 > prb_vf[i]) {
3059 dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3060 extended_func_num(pdev));
3061 err = -ENODEV;
3062 goto err_release_regions;
3063 }
3064 }
3065 }
3066
3067 err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
3068 if (err)
3069 goto err_release_regions;
3070 return 0;
225c7b1f 3071
a01df0fe
RD
3072err_release_regions:
3073 pci_release_regions(pdev);
225c7b1f
RD
3074
3075err_disable_pdev:
3076 pci_disable_device(pdev);
3077 pci_set_drvdata(pdev, NULL);
3078 return err;
3079}
3080
1dd06ae8 3081static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3d73c288 3082{
befdf897
WY
3083 struct mlx4_priv *priv;
3084 struct mlx4_dev *dev;
e1c00e10 3085 int ret;
befdf897 3086
0a645e80 3087 printk_once(KERN_INFO "%s", mlx4_version);
3d73c288 3088
befdf897
WY
3089 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
3090 if (!priv)
3091 return -ENOMEM;
3092
3093 dev = &priv->dev;
872bf2fb
YH
3094 dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3095 if (!dev->persist) {
3096 kfree(priv);
3097 return -ENOMEM;
3098 }
3099 dev->persist->pdev = pdev;
3100 dev->persist->dev = dev;
3101 pci_set_drvdata(pdev, dev->persist);
befdf897
WY
3102 priv->pci_dev_data = id->driver_data;
3103
e1c00e10 3104 ret = __mlx4_init_one(pdev, id->driver_data, priv);
872bf2fb
YH
3105 if (ret) {
3106 kfree(dev->persist);
e1c00e10 3107 kfree(priv);
872bf2fb 3108 }
e1c00e10 3109 return ret;
3d73c288
RD
3110}
3111
dd0eefe3
YH
3112static void mlx4_clean_dev(struct mlx4_dev *dev)
3113{
3114 struct mlx4_dev_persistent *persist = dev->persist;
3115 struct mlx4_priv *priv = mlx4_priv(dev);
3116
3117 memset(priv, 0, sizeof(*priv));
3118 priv->dev.persist = persist;
3119}
3120
e1c00e10 3121static void mlx4_unload_one(struct pci_dev *pdev)
225c7b1f 3122{
872bf2fb
YH
3123 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3124 struct mlx4_dev *dev = persist->dev;
225c7b1f 3125 struct mlx4_priv *priv = mlx4_priv(dev);
befdf897 3126 int pci_dev_data;
dd0eefe3 3127 int p, i;
bbb07af4 3128 int active_vfs = 0;
225c7b1f 3129
befdf897
WY
3130 if (priv->removed)
3131 return;
225c7b1f 3132
dd0eefe3
YH
3133 /* saving current ports type for further use */
3134 for (i = 0; i < dev->caps.num_ports; i++) {
3135 dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
3136 dev->persist->curr_port_poss_type[i] = dev->caps.
3137 possible_type[i + 1];
3138 }
3139
befdf897 3140 pci_dev_data = priv->pci_dev_data;
225c7b1f 3141
bbb07af4
JM
3142 /* Disabling SR-IOV is not allowed while there are active vf's */
3143 if (mlx4_is_master(dev)) {
3144 active_vfs = mlx4_how_many_lives_vf(dev);
3145 if (active_vfs) {
3146 pr_warn("Removing PF when there are active VF's !!\n");
3147 pr_warn("Will not disable SR-IOV.\n");
3148 }
3149 }
befdf897
WY
3150 mlx4_stop_sense(dev);
3151 mlx4_unregister_device(dev);
225c7b1f 3152
befdf897
WY
3153 for (p = 1; p <= dev->caps.num_ports; p++) {
3154 mlx4_cleanup_port_info(&priv->port[p]);
3155 mlx4_CLOSE_PORT(dev, p);
3156 }
3157
3158 if (mlx4_is_master(dev))
3159 mlx4_free_resource_tracker(dev,
3160 RES_TR_FREE_SLAVES_ONLY);
3161
3162 mlx4_cleanup_counters_table(dev);
3163 mlx4_cleanup_qp_table(dev);
3164 mlx4_cleanup_srq_table(dev);
3165 mlx4_cleanup_cq_table(dev);
3166 mlx4_cmd_use_polling(dev);
3167 mlx4_cleanup_eq_table(dev);
3168 mlx4_cleanup_mcg_table(dev);
3169 mlx4_cleanup_mr_table(dev);
3170 mlx4_cleanup_xrcd_table(dev);
3171 mlx4_cleanup_pd_table(dev);
225c7b1f 3172
befdf897
WY
3173 if (mlx4_is_master(dev))
3174 mlx4_free_resource_tracker(dev,
3175 RES_TR_FREE_STRUCTS_ONLY);
47605df9 3176
befdf897
WY
3177 iounmap(priv->kar);
3178 mlx4_uar_free(dev, &priv->driver_uar);
3179 mlx4_cleanup_uar_table(dev);
3180 if (!mlx4_is_slave(dev))
3181 mlx4_clear_steering(dev);
3182 mlx4_free_eq_table(dev);
3183 if (mlx4_is_master(dev))
3184 mlx4_multi_func_cleanup(dev);
3185 mlx4_close_hca(dev);
a0eacca9 3186 mlx4_close_fw(dev);
befdf897
WY
3187 if (mlx4_is_slave(dev))
3188 mlx4_multi_func_cleanup(dev);
ffc39f6d 3189 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
47605df9 3190
befdf897
WY
3191 if (dev->flags & MLX4_FLAG_MSI_X)
3192 pci_disable_msix(pdev);
bbb07af4 3193 if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
befdf897
WY
3194 mlx4_warn(dev, "Disabling SR-IOV\n");
3195 pci_disable_sriov(pdev);
a0eacca9 3196 dev->flags &= ~MLX4_FLAG_SRIOV;
872bf2fb 3197 dev->persist->num_vfs = 0;
225c7b1f 3198 }
befdf897
WY
3199
3200 if (!mlx4_is_slave(dev))
3201 mlx4_free_ownership(dev);
3202
99ec41d0 3203 kfree(dev->caps.qp0_qkey);
befdf897
WY
3204 kfree(dev->caps.qp0_tunnel);
3205 kfree(dev->caps.qp0_proxy);
3206 kfree(dev->caps.qp1_tunnel);
3207 kfree(dev->caps.qp1_proxy);
3208 kfree(dev->dev_vfs);
3209
dd0eefe3 3210 mlx4_clean_dev(dev);
befdf897
WY
3211 priv->pci_dev_data = pci_dev_data;
3212 priv->removed = 1;
3213}
3214
3215static void mlx4_remove_one(struct pci_dev *pdev)
3216{
872bf2fb
YH
3217 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3218 struct mlx4_dev *dev = persist->dev;
befdf897
WY
3219 struct mlx4_priv *priv = mlx4_priv(dev);
3220
e1c00e10
MD
3221 mlx4_unload_one(pdev);
3222 pci_release_regions(pdev);
3223 pci_disable_device(pdev);
872bf2fb 3224 kfree(dev->persist);
befdf897
WY
3225 kfree(priv);
3226 pci_set_drvdata(pdev, NULL);
225c7b1f
RD
3227}
3228
dd0eefe3
YH
3229static int restore_current_port_types(struct mlx4_dev *dev,
3230 enum mlx4_port_type *types,
3231 enum mlx4_port_type *poss_types)
3232{
3233 struct mlx4_priv *priv = mlx4_priv(dev);
3234 int err, i;
3235
3236 mlx4_stop_sense(dev);
3237
3238 mutex_lock(&priv->port_mutex);
3239 for (i = 0; i < dev->caps.num_ports; i++)
3240 dev->caps.possible_type[i + 1] = poss_types[i];
3241 err = mlx4_change_port_types(dev, types);
3242 mlx4_start_sense(dev);
3243 mutex_unlock(&priv->port_mutex);
3244
3245 return err;
3246}
3247
ee49bd93
JM
3248int mlx4_restart_one(struct pci_dev *pdev)
3249{
872bf2fb
YH
3250 struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3251 struct mlx4_dev *dev = persist->dev;
839f1243 3252 struct mlx4_priv *priv = mlx4_priv(dev);
e1c00e10
MD
3253 int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3254 int pci_dev_data, err, total_vfs;
839f1243
RD
3255
3256 pci_dev_data = priv->pci_dev_data;
872bf2fb
YH
3257 total_vfs = dev->persist->num_vfs;
3258 memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
e1c00e10
MD
3259
3260 mlx4_unload_one(pdev);
3261 err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
3262 if (err) {
3263 mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
3264 __func__, pci_name(pdev), err);
3265 return err;
3266 }
3267
dd0eefe3
YH
3268 err = restore_current_port_types(dev, dev->persist->curr_port_type,
3269 dev->persist->curr_port_poss_type);
3270 if (err)
3271 mlx4_err(dev, "could not restore original port types (%d)\n",
3272 err);
3273
e1c00e10 3274 return err;
ee49bd93
JM
3275}
3276
9baa3c34 3277static const struct pci_device_id mlx4_pci_table[] = {
ab9c17a0 3278 /* MT25408 "Hermon" SDR */
ca3e57a5 3279 { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3280 /* MT25408 "Hermon" DDR */
ca3e57a5 3281 { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3282 /* MT25408 "Hermon" QDR */
ca3e57a5 3283 { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3284 /* MT25408 "Hermon" DDR PCIe gen2 */
ca3e57a5 3285 { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3286 /* MT25408 "Hermon" QDR PCIe gen2 */
ca3e57a5 3287 { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3288 /* MT25408 "Hermon" EN 10GigE */
ca3e57a5 3289 { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3290 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
ca3e57a5 3291 { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3292 /* MT25458 ConnectX EN 10GBASE-T 10GigE */
ca3e57a5 3293 { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3294 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
ca3e57a5 3295 { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3296 /* MT26468 ConnectX EN 10GigE PCIe gen2*/
ca3e57a5 3297 { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3298 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
ca3e57a5 3299 { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3300 /* MT26478 ConnectX2 40GigE PCIe gen2 */
ca3e57a5 3301 { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
ab9c17a0 3302 /* MT25400 Family [ConnectX-2 Virtual Function] */
839f1243 3303 { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
ab9c17a0
JM
3304 /* MT27500 Family [ConnectX-3] */
3305 { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
3306 /* MT27500 Family [ConnectX-3 Virtual Function] */
839f1243 3307 { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
ab9c17a0
JM
3308 { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
3309 { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
3310 { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
3311 { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
3312 { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
3313 { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
3314 { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
3315 { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
3316 { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
3317 { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
3318 { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
3319 { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
225c7b1f
RD
3320 { 0, }
3321};
3322
3323MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
3324
57dbf29a
KSS
3325static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
3326 pci_channel_state_t state)
3327{
e1c00e10 3328 mlx4_unload_one(pdev);
57dbf29a
KSS
3329
3330 return state == pci_channel_io_perm_failure ?
3331 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
3332}
3333
3334static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
3335{
befdf897
WY
3336 struct mlx4_dev *dev = pci_get_drvdata(pdev);
3337 struct mlx4_priv *priv = mlx4_priv(dev);
3338 int ret;
97a5221f 3339
e1c00e10 3340 ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv);
57dbf29a
KSS
3341
3342 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
3343}
3344
3646f0e5 3345static const struct pci_error_handlers mlx4_err_handler = {
57dbf29a
KSS
3346 .error_detected = mlx4_pci_err_detected,
3347 .slot_reset = mlx4_pci_slot_reset,
3348};
3349
225c7b1f
RD
3350static struct pci_driver mlx4_driver = {
3351 .name = DRV_NAME,
3352 .id_table = mlx4_pci_table,
3353 .probe = mlx4_init_one,
e1c00e10 3354 .shutdown = mlx4_unload_one,
f57e6848 3355 .remove = mlx4_remove_one,
57dbf29a 3356 .err_handler = &mlx4_err_handler,
225c7b1f
RD
3357};
3358
7ff93f8b
YP
3359static int __init mlx4_verify_params(void)
3360{
3361 if ((log_num_mac < 0) || (log_num_mac > 7)) {
c20862c8 3362 pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
7ff93f8b
YP
3363 return -1;
3364 }
3365
cb29688a 3366 if (log_num_vlan != 0)
c20862c8
AV
3367 pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
3368 MLX4_LOG_NUM_VLANS);
7ff93f8b 3369
ecc8fb11
AV
3370 if (use_prio != 0)
3371 pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
7ff93f8b 3372
0498628f 3373 if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
c20862c8
AV
3374 pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
3375 log_mtts_per_seg);
ab6bf42e
EC
3376 return -1;
3377 }
3378
ab9c17a0
JM
3379 /* Check if module param for ports type has legal combination */
3380 if (port_type_array[0] == false && port_type_array[1] == true) {
c20862c8 3381 pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
ab9c17a0
JM
3382 port_type_array[0] = true;
3383 }
3384
7d077cd3
MB
3385 if (mlx4_log_num_mgm_entry_size < -7 ||
3386 (mlx4_log_num_mgm_entry_size > 0 &&
3387 (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
3388 mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
3389 pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
1a91de28
JP
3390 mlx4_log_num_mgm_entry_size,
3391 MLX4_MIN_MGM_LOG_ENTRY_SIZE,
3392 MLX4_MAX_MGM_LOG_ENTRY_SIZE);
3c439b55
JM
3393 return -1;
3394 }
3395
7ff93f8b
YP
3396 return 0;
3397}
3398
225c7b1f
RD
3399static int __init mlx4_init(void)
3400{
3401 int ret;
3402
7ff93f8b
YP
3403 if (mlx4_verify_params())
3404 return -EINVAL;
3405
27bf91d6
YP
3406 mlx4_catas_init();
3407
3408 mlx4_wq = create_singlethread_workqueue("mlx4");
3409 if (!mlx4_wq)
3410 return -ENOMEM;
ee49bd93 3411
225c7b1f 3412 ret = pci_register_driver(&mlx4_driver);
1b85ee09
WY
3413 if (ret < 0)
3414 destroy_workqueue(mlx4_wq);
225c7b1f
RD
3415 return ret < 0 ? ret : 0;
3416}
3417
3418static void __exit mlx4_cleanup(void)
3419{
3420 pci_unregister_driver(&mlx4_driver);
27bf91d6 3421 destroy_workqueue(mlx4_wq);
225c7b1f
RD
3422}
3423
3424module_init(mlx4_init);
3425module_exit(mlx4_cleanup);