IB/mlx4: Add support for steerable IB UD QPs
[linux-2.6-block.git] / drivers / infiniband / hw / mlx4 / main.c
1 /*
2  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/module.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/errno.h>
38 #include <linux/netdevice.h>
39 #include <linux/inetdevice.h>
40 #include <linux/rtnetlink.h>
41 #include <linux/if_vlan.h>
42
43 #include <rdma/ib_smi.h>
44 #include <rdma/ib_user_verbs.h>
45 #include <rdma/ib_addr.h>
46
47 #include <linux/mlx4/driver.h>
48 #include <linux/mlx4/cmd.h>
49
50 #include "mlx4_ib.h"
51 #include "user.h"
52
53 #define DRV_NAME        MLX4_IB_DRV_NAME
54 #define DRV_VERSION     "1.0"
55 #define DRV_RELDATE     "April 4, 2008"
56
57 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
58 #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
59
60 MODULE_AUTHOR("Roland Dreier");
61 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
62 MODULE_LICENSE("Dual BSD/GPL");
63 MODULE_VERSION(DRV_VERSION);
64
65 int mlx4_ib_sm_guid_assign = 1;
66 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
67 MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
68
69 static const char mlx4_ib_version[] =
70         DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
71         DRV_VERSION " (" DRV_RELDATE ")\n";
72
73 struct update_gid_work {
74         struct work_struct      work;
75         union ib_gid            gids[128];
76         struct mlx4_ib_dev     *dev;
77         int                     port;
78 };
79
80 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
81
82 static struct workqueue_struct *wq;
83
84 static void init_query_mad(struct ib_smp *mad)
85 {
86         mad->base_version  = 1;
87         mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
88         mad->class_version = 1;
89         mad->method        = IB_MGMT_METHOD_GET;
90 }
91
92 static union ib_gid zgid;
93
94 static int check_flow_steering_support(struct mlx4_dev *dev)
95 {
96         int eth_num_ports = 0;
97         int ib_num_ports = 0;
98
99         int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
100
101         if (dmfs) {
102                 int i;
103                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
104                         eth_num_ports++;
105                 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
106                         ib_num_ports++;
107                 dmfs &= (!ib_num_ports ||
108                          (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
109                         (!eth_num_ports ||
110                          (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
111                 if (ib_num_ports && mlx4_is_mfunc(dev)) {
112                         pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
113                         dmfs = 0;
114                 }
115         }
116         return dmfs;
117 }
118
119 static int mlx4_ib_query_device(struct ib_device *ibdev,
120                                 struct ib_device_attr *props)
121 {
122         struct mlx4_ib_dev *dev = to_mdev(ibdev);
123         struct ib_smp *in_mad  = NULL;
124         struct ib_smp *out_mad = NULL;
125         int err = -ENOMEM;
126
127         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
128         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
129         if (!in_mad || !out_mad)
130                 goto out;
131
132         init_query_mad(in_mad);
133         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
134
135         err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
136                            1, NULL, NULL, in_mad, out_mad);
137         if (err)
138                 goto out;
139
140         memset(props, 0, sizeof *props);
141
142         props->fw_ver = dev->dev->caps.fw_ver;
143         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
144                 IB_DEVICE_PORT_ACTIVE_EVENT             |
145                 IB_DEVICE_SYS_IMAGE_GUID                |
146                 IB_DEVICE_RC_RNR_NAK_GEN                |
147                 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
148         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
149                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
150         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
151                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
152         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
153                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
154         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
155                 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
156         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
157                 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
158         if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
159                 props->device_cap_flags |= IB_DEVICE_UD_TSO;
160         if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
161                 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
162         if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
163             (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
164             (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
165                 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
166         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
167                 props->device_cap_flags |= IB_DEVICE_XRC;
168         if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
169                 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
170         if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
171                 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
172                         props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
173                 else
174                         props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
175         if (dev->steering_support ==  MLX4_STEERING_MODE_DEVICE_MANAGED)
176                 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
177         }
178
179         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
180                 0xffffff;
181         props->vendor_part_id      = dev->dev->pdev->device;
182         props->hw_ver              = be32_to_cpup((__be32 *) (out_mad->data + 32));
183         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
184
185         props->max_mr_size         = ~0ull;
186         props->page_size_cap       = dev->dev->caps.page_size_cap;
187         props->max_qp              = dev->dev->quotas.qp;
188         props->max_qp_wr           = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
189         props->max_sge             = min(dev->dev->caps.max_sq_sg,
190                                          dev->dev->caps.max_rq_sg);
191         props->max_cq              = dev->dev->quotas.cq;
192         props->max_cqe             = dev->dev->caps.max_cqes;
193         props->max_mr              = dev->dev->quotas.mpt;
194         props->max_pd              = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
195         props->max_qp_rd_atom      = dev->dev->caps.max_qp_dest_rdma;
196         props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
197         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
198         props->max_srq             = dev->dev->quotas.srq;
199         props->max_srq_wr          = dev->dev->caps.max_srq_wqes - 1;
200         props->max_srq_sge         = dev->dev->caps.max_srq_sge;
201         props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
202         props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
203         props->atomic_cap          = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
204                 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
205         props->masked_atomic_cap   = props->atomic_cap;
206         props->max_pkeys           = dev->dev->caps.pkey_table_len[1];
207         props->max_mcast_grp       = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
208         props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
209         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
210                                            props->max_mcast_grp;
211         props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
212
213 out:
214         kfree(in_mad);
215         kfree(out_mad);
216
217         return err;
218 }
219
220 static enum rdma_link_layer
221 mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
222 {
223         struct mlx4_dev *dev = to_mdev(device)->dev;
224
225         return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
226                 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
227 }
228
229 static int ib_link_query_port(struct ib_device *ibdev, u8 port,
230                               struct ib_port_attr *props, int netw_view)
231 {
232         struct ib_smp *in_mad  = NULL;
233         struct ib_smp *out_mad = NULL;
234         int ext_active_speed;
235         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
236         int err = -ENOMEM;
237
238         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
239         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
240         if (!in_mad || !out_mad)
241                 goto out;
242
243         init_query_mad(in_mad);
244         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
245         in_mad->attr_mod = cpu_to_be32(port);
246
247         if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
248                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
249
250         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
251                                 in_mad, out_mad);
252         if (err)
253                 goto out;
254
255
256         props->lid              = be16_to_cpup((__be16 *) (out_mad->data + 16));
257         props->lmc              = out_mad->data[34] & 0x7;
258         props->sm_lid           = be16_to_cpup((__be16 *) (out_mad->data + 18));
259         props->sm_sl            = out_mad->data[36] & 0xf;
260         props->state            = out_mad->data[32] & 0xf;
261         props->phys_state       = out_mad->data[33] >> 4;
262         props->port_cap_flags   = be32_to_cpup((__be32 *) (out_mad->data + 20));
263         if (netw_view)
264                 props->gid_tbl_len = out_mad->data[50];
265         else
266                 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
267         props->max_msg_sz       = to_mdev(ibdev)->dev->caps.max_msg_sz;
268         props->pkey_tbl_len     = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
269         props->bad_pkey_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 46));
270         props->qkey_viol_cntr   = be16_to_cpup((__be16 *) (out_mad->data + 48));
271         props->active_width     = out_mad->data[31] & 0xf;
272         props->active_speed     = out_mad->data[35] >> 4;
273         props->max_mtu          = out_mad->data[41] & 0xf;
274         props->active_mtu       = out_mad->data[36] >> 4;
275         props->subnet_timeout   = out_mad->data[51] & 0x1f;
276         props->max_vl_num       = out_mad->data[37] >> 4;
277         props->init_type_reply  = out_mad->data[41] >> 4;
278
279         /* Check if extended speeds (EDR/FDR/...) are supported */
280         if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
281                 ext_active_speed = out_mad->data[62] >> 4;
282
283                 switch (ext_active_speed) {
284                 case 1:
285                         props->active_speed = IB_SPEED_FDR;
286                         break;
287                 case 2:
288                         props->active_speed = IB_SPEED_EDR;
289                         break;
290                 }
291         }
292
293         /* If reported active speed is QDR, check if is FDR-10 */
294         if (props->active_speed == IB_SPEED_QDR) {
295                 init_query_mad(in_mad);
296                 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
297                 in_mad->attr_mod = cpu_to_be32(port);
298
299                 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
300                                    NULL, NULL, in_mad, out_mad);
301                 if (err)
302                         goto out;
303
304                 /* Checking LinkSpeedActive for FDR-10 */
305                 if (out_mad->data[15] & 0x1)
306                         props->active_speed = IB_SPEED_FDR10;
307         }
308
309         /* Avoid wrong speed value returned by FW if the IB link is down. */
310         if (props->state == IB_PORT_DOWN)
311                  props->active_speed = IB_SPEED_SDR;
312
313 out:
314         kfree(in_mad);
315         kfree(out_mad);
316         return err;
317 }
318
319 static u8 state_to_phys_state(enum ib_port_state state)
320 {
321         return state == IB_PORT_ACTIVE ? 5 : 3;
322 }
323
324 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
325                                struct ib_port_attr *props, int netw_view)
326 {
327
328         struct mlx4_ib_dev *mdev = to_mdev(ibdev);
329         struct mlx4_ib_iboe *iboe = &mdev->iboe;
330         struct net_device *ndev;
331         enum ib_mtu tmp;
332         struct mlx4_cmd_mailbox *mailbox;
333         int err = 0;
334
335         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
336         if (IS_ERR(mailbox))
337                 return PTR_ERR(mailbox);
338
339         err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
340                            MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
341                            MLX4_CMD_WRAPPED);
342         if (err)
343                 goto out;
344
345         props->active_width     =  (((u8 *)mailbox->buf)[5] == 0x40) ?
346                                                 IB_WIDTH_4X : IB_WIDTH_1X;
347         props->active_speed     = IB_SPEED_QDR;
348         props->port_cap_flags   = IB_PORT_CM_SUP;
349         props->gid_tbl_len      = mdev->dev->caps.gid_table_len[port];
350         props->max_msg_sz       = mdev->dev->caps.max_msg_sz;
351         props->pkey_tbl_len     = 1;
352         props->max_mtu          = IB_MTU_4096;
353         props->max_vl_num       = 2;
354         props->state            = IB_PORT_DOWN;
355         props->phys_state       = state_to_phys_state(props->state);
356         props->active_mtu       = IB_MTU_256;
357         spin_lock(&iboe->lock);
358         ndev = iboe->netdevs[port - 1];
359         if (!ndev)
360                 goto out_unlock;
361
362         tmp = iboe_get_mtu(ndev->mtu);
363         props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
364
365         props->state            = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
366                                         IB_PORT_ACTIVE : IB_PORT_DOWN;
367         props->phys_state       = state_to_phys_state(props->state);
368 out_unlock:
369         spin_unlock(&iboe->lock);
370 out:
371         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
372         return err;
373 }
374
375 int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
376                          struct ib_port_attr *props, int netw_view)
377 {
378         int err;
379
380         memset(props, 0, sizeof *props);
381
382         err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
383                 ib_link_query_port(ibdev, port, props, netw_view) :
384                                 eth_link_query_port(ibdev, port, props, netw_view);
385
386         return err;
387 }
388
389 static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
390                               struct ib_port_attr *props)
391 {
392         /* returns host view */
393         return __mlx4_ib_query_port(ibdev, port, props, 0);
394 }
395
396 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
397                         union ib_gid *gid, int netw_view)
398 {
399         struct ib_smp *in_mad  = NULL;
400         struct ib_smp *out_mad = NULL;
401         int err = -ENOMEM;
402         struct mlx4_ib_dev *dev = to_mdev(ibdev);
403         int clear = 0;
404         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
405
406         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
407         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
408         if (!in_mad || !out_mad)
409                 goto out;
410
411         init_query_mad(in_mad);
412         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
413         in_mad->attr_mod = cpu_to_be32(port);
414
415         if (mlx4_is_mfunc(dev->dev) && netw_view)
416                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
417
418         err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
419         if (err)
420                 goto out;
421
422         memcpy(gid->raw, out_mad->data + 8, 8);
423
424         if (mlx4_is_mfunc(dev->dev) && !netw_view) {
425                 if (index) {
426                         /* For any index > 0, return the null guid */
427                         err = 0;
428                         clear = 1;
429                         goto out;
430                 }
431         }
432
433         init_query_mad(in_mad);
434         in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
435         in_mad->attr_mod = cpu_to_be32(index / 8);
436
437         err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
438                            NULL, NULL, in_mad, out_mad);
439         if (err)
440                 goto out;
441
442         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
443
444 out:
445         if (clear)
446                 memset(gid->raw + 8, 0, 8);
447         kfree(in_mad);
448         kfree(out_mad);
449         return err;
450 }
451
452 static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
453                           union ib_gid *gid)
454 {
455         struct mlx4_ib_dev *dev = to_mdev(ibdev);
456
457         *gid = dev->iboe.gid_table[port - 1][index];
458
459         return 0;
460 }
461
462 static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
463                              union ib_gid *gid)
464 {
465         if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
466                 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
467         else
468                 return iboe_query_gid(ibdev, port, index, gid);
469 }
470
471 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
472                          u16 *pkey, int netw_view)
473 {
474         struct ib_smp *in_mad  = NULL;
475         struct ib_smp *out_mad = NULL;
476         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
477         int err = -ENOMEM;
478
479         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
480         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
481         if (!in_mad || !out_mad)
482                 goto out;
483
484         init_query_mad(in_mad);
485         in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
486         in_mad->attr_mod = cpu_to_be32(index / 32);
487
488         if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
489                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
490
491         err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
492                            in_mad, out_mad);
493         if (err)
494                 goto out;
495
496         *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
497
498 out:
499         kfree(in_mad);
500         kfree(out_mad);
501         return err;
502 }
503
504 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
505 {
506         return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
507 }
508
509 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
510                                  struct ib_device_modify *props)
511 {
512         struct mlx4_cmd_mailbox *mailbox;
513         unsigned long flags;
514
515         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
516                 return -EOPNOTSUPP;
517
518         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
519                 return 0;
520
521         if (mlx4_is_slave(to_mdev(ibdev)->dev))
522                 return -EOPNOTSUPP;
523
524         spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
525         memcpy(ibdev->node_desc, props->node_desc, 64);
526         spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
527
528         /*
529          * If possible, pass node desc to FW, so it can generate
530          * a 144 trap.  If cmd fails, just ignore.
531          */
532         mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
533         if (IS_ERR(mailbox))
534                 return 0;
535
536         memcpy(mailbox->buf, props->node_desc, 64);
537         mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
538                  MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
539
540         mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
541
542         return 0;
543 }
544
545 static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
546                          u32 cap_mask)
547 {
548         struct mlx4_cmd_mailbox *mailbox;
549         int err;
550         u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
551
552         mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
553         if (IS_ERR(mailbox))
554                 return PTR_ERR(mailbox);
555
556         if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
557                 *(u8 *) mailbox->buf         = !!reset_qkey_viols << 6;
558                 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
559         } else {
560                 ((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
561                 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
562         }
563
564         err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
565                        MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
566
567         mlx4_free_cmd_mailbox(dev->dev, mailbox);
568         return err;
569 }
570
571 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
572                                struct ib_port_modify *props)
573 {
574         struct ib_port_attr attr;
575         u32 cap_mask;
576         int err;
577
578         mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
579
580         err = mlx4_ib_query_port(ibdev, port, &attr);
581         if (err)
582                 goto out;
583
584         cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
585                 ~props->clr_port_cap_mask;
586
587         err = mlx4_SET_PORT(to_mdev(ibdev), port,
588                             !!(mask & IB_PORT_RESET_QKEY_CNTR),
589                             cap_mask);
590
591 out:
592         mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
593         return err;
594 }
595
596 static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
597                                                   struct ib_udata *udata)
598 {
599         struct mlx4_ib_dev *dev = to_mdev(ibdev);
600         struct mlx4_ib_ucontext *context;
601         struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
602         struct mlx4_ib_alloc_ucontext_resp resp;
603         int err;
604
605         if (!dev->ib_active)
606                 return ERR_PTR(-EAGAIN);
607
608         if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
609                 resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
610                 resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
611                 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
612         } else {
613                 resp.dev_caps         = dev->dev->caps.userspace_caps;
614                 resp.qp_tab_size      = dev->dev->caps.num_qps;
615                 resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
616                 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
617                 resp.cqe_size         = dev->dev->caps.cqe_size;
618         }
619
620         context = kmalloc(sizeof *context, GFP_KERNEL);
621         if (!context)
622                 return ERR_PTR(-ENOMEM);
623
624         err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
625         if (err) {
626                 kfree(context);
627                 return ERR_PTR(err);
628         }
629
630         INIT_LIST_HEAD(&context->db_page_list);
631         mutex_init(&context->db_page_mutex);
632
633         if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
634                 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
635         else
636                 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
637
638         if (err) {
639                 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
640                 kfree(context);
641                 return ERR_PTR(-EFAULT);
642         }
643
644         return &context->ibucontext;
645 }
646
647 static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
648 {
649         struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
650
651         mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
652         kfree(context);
653
654         return 0;
655 }
656
657 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
658 {
659         struct mlx4_ib_dev *dev = to_mdev(context->device);
660
661         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
662                 return -EINVAL;
663
664         if (vma->vm_pgoff == 0) {
665                 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
666
667                 if (io_remap_pfn_range(vma, vma->vm_start,
668                                        to_mucontext(context)->uar.pfn,
669                                        PAGE_SIZE, vma->vm_page_prot))
670                         return -EAGAIN;
671         } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
672                 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
673
674                 if (io_remap_pfn_range(vma, vma->vm_start,
675                                        to_mucontext(context)->uar.pfn +
676                                        dev->dev->caps.num_uars,
677                                        PAGE_SIZE, vma->vm_page_prot))
678                         return -EAGAIN;
679         } else
680                 return -EINVAL;
681
682         return 0;
683 }
684
685 static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
686                                       struct ib_ucontext *context,
687                                       struct ib_udata *udata)
688 {
689         struct mlx4_ib_pd *pd;
690         int err;
691
692         pd = kmalloc(sizeof *pd, GFP_KERNEL);
693         if (!pd)
694                 return ERR_PTR(-ENOMEM);
695
696         err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
697         if (err) {
698                 kfree(pd);
699                 return ERR_PTR(err);
700         }
701
702         if (context)
703                 if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
704                         mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
705                         kfree(pd);
706                         return ERR_PTR(-EFAULT);
707                 }
708
709         return &pd->ibpd;
710 }
711
712 static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
713 {
714         mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
715         kfree(pd);
716
717         return 0;
718 }
719
720 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
721                                           struct ib_ucontext *context,
722                                           struct ib_udata *udata)
723 {
724         struct mlx4_ib_xrcd *xrcd;
725         int err;
726
727         if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
728                 return ERR_PTR(-ENOSYS);
729
730         xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
731         if (!xrcd)
732                 return ERR_PTR(-ENOMEM);
733
734         err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
735         if (err)
736                 goto err1;
737
738         xrcd->pd = ib_alloc_pd(ibdev);
739         if (IS_ERR(xrcd->pd)) {
740                 err = PTR_ERR(xrcd->pd);
741                 goto err2;
742         }
743
744         xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
745         if (IS_ERR(xrcd->cq)) {
746                 err = PTR_ERR(xrcd->cq);
747                 goto err3;
748         }
749
750         return &xrcd->ibxrcd;
751
752 err3:
753         ib_dealloc_pd(xrcd->pd);
754 err2:
755         mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
756 err1:
757         kfree(xrcd);
758         return ERR_PTR(err);
759 }
760
761 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
762 {
763         ib_destroy_cq(to_mxrcd(xrcd)->cq);
764         ib_dealloc_pd(to_mxrcd(xrcd)->pd);
765         mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
766         kfree(xrcd);
767
768         return 0;
769 }
770
771 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
772 {
773         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
774         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
775         struct mlx4_ib_gid_entry *ge;
776
777         ge = kzalloc(sizeof *ge, GFP_KERNEL);
778         if (!ge)
779                 return -ENOMEM;
780
781         ge->gid = *gid;
782         if (mlx4_ib_add_mc(mdev, mqp, gid)) {
783                 ge->port = mqp->port;
784                 ge->added = 1;
785         }
786
787         mutex_lock(&mqp->mutex);
788         list_add_tail(&ge->list, &mqp->gid_list);
789         mutex_unlock(&mqp->mutex);
790
791         return 0;
792 }
793
794 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
795                    union ib_gid *gid)
796 {
797         u8 mac[6];
798         struct net_device *ndev;
799         int ret = 0;
800
801         if (!mqp->port)
802                 return 0;
803
804         spin_lock(&mdev->iboe.lock);
805         ndev = mdev->iboe.netdevs[mqp->port - 1];
806         if (ndev)
807                 dev_hold(ndev);
808         spin_unlock(&mdev->iboe.lock);
809
810         if (ndev) {
811                 rdma_get_mcast_mac((struct in6_addr *)gid, mac);
812                 rtnl_lock();
813                 dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
814                 ret = 1;
815                 rtnl_unlock();
816                 dev_put(ndev);
817         }
818
819         return ret;
820 }
821
822 struct mlx4_ib_steering {
823         struct list_head list;
824         u64 reg_id;
825         union ib_gid gid;
826 };
827
828 static int parse_flow_attr(struct mlx4_dev *dev,
829                            u32 qp_num,
830                            union ib_flow_spec *ib_spec,
831                            struct _rule_hw *mlx4_spec)
832 {
833         enum mlx4_net_trans_rule_id type;
834
835         switch (ib_spec->type) {
836         case IB_FLOW_SPEC_ETH:
837                 type = MLX4_NET_TRANS_RULE_ID_ETH;
838                 memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
839                        ETH_ALEN);
840                 memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
841                        ETH_ALEN);
842                 mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
843                 mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
844                 break;
845         case IB_FLOW_SPEC_IB:
846                 type = MLX4_NET_TRANS_RULE_ID_IB;
847                 mlx4_spec->ib.l3_qpn =
848                         cpu_to_be32(qp_num);
849                 mlx4_spec->ib.qpn_mask =
850                         cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
851                 break;
852
853
854         case IB_FLOW_SPEC_IPV4:
855                 type = MLX4_NET_TRANS_RULE_ID_IPV4;
856                 mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
857                 mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
858                 mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
859                 mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
860                 break;
861
862         case IB_FLOW_SPEC_TCP:
863         case IB_FLOW_SPEC_UDP:
864                 type = ib_spec->type == IB_FLOW_SPEC_TCP ?
865                                         MLX4_NET_TRANS_RULE_ID_TCP :
866                                         MLX4_NET_TRANS_RULE_ID_UDP;
867                 mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
868                 mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
869                 mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
870                 mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
871                 break;
872
873         default:
874                 return -EINVAL;
875         }
876         if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
877             mlx4_hw_rule_sz(dev, type) < 0)
878                 return -EINVAL;
879         mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
880         mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
881         return mlx4_hw_rule_sz(dev, type);
882 }
883
884 struct default_rules {
885         __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
886         __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
887         __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
888         __u8  link_layer;
889 };
890 static const struct default_rules default_table[] = {
891         {
892                 .mandatory_fields = {IB_FLOW_SPEC_IPV4},
893                 .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
894                 .rules_create_list = {IB_FLOW_SPEC_IB},
895                 .link_layer = IB_LINK_LAYER_INFINIBAND
896         }
897 };
898
899 static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
900                                          struct ib_flow_attr *flow_attr)
901 {
902         int i, j, k;
903         void *ib_flow;
904         const struct default_rules *pdefault_rules = default_table;
905         u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
906
907         for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
908              pdefault_rules++) {
909                 __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
910                 memset(&field_types, 0, sizeof(field_types));
911
912                 if (link_layer != pdefault_rules->link_layer)
913                         continue;
914
915                 ib_flow = flow_attr + 1;
916                 /* we assume the specs are sorted */
917                 for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
918                      j < flow_attr->num_of_specs; k++) {
919                         union ib_flow_spec *current_flow =
920                                 (union ib_flow_spec *)ib_flow;
921
922                         /* same layer but different type */
923                         if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
924                              (pdefault_rules->mandatory_fields[k] &
925                               IB_FLOW_SPEC_LAYER_MASK)) &&
926                             (current_flow->type !=
927                              pdefault_rules->mandatory_fields[k]))
928                                 goto out;
929
930                         /* same layer, try match next one */
931                         if (current_flow->type ==
932                             pdefault_rules->mandatory_fields[k]) {
933                                 j++;
934                                 ib_flow +=
935                                         ((union ib_flow_spec *)ib_flow)->size;
936                         }
937                 }
938
939                 ib_flow = flow_attr + 1;
940                 for (j = 0; j < flow_attr->num_of_specs;
941                      j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
942                         for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
943                                 /* same layer and same type */
944                                 if (((union ib_flow_spec *)ib_flow)->type ==
945                                     pdefault_rules->mandatory_not_fields[k])
946                                         goto out;
947
948                 return i;
949         }
950 out:
951         return -1;
952 }
953
954 static int __mlx4_ib_create_default_rules(
955                 struct mlx4_ib_dev *mdev,
956                 struct ib_qp *qp,
957                 const struct default_rules *pdefault_rules,
958                 struct _rule_hw *mlx4_spec) {
959         int size = 0;
960         int i;
961
962         for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
963                         sizeof(pdefault_rules->rules_create_list[0]); i++) {
964                 int ret;
965                 union ib_flow_spec ib_spec;
966                 switch (pdefault_rules->rules_create_list[i]) {
967                 case 0:
968                         /* no rule */
969                         continue;
970                 case IB_FLOW_SPEC_IB:
971                         ib_spec.type = IB_FLOW_SPEC_IB;
972                         ib_spec.size = sizeof(struct ib_flow_spec_ib);
973
974                         break;
975                 default:
976                         /* invalid rule */
977                         return -EINVAL;
978                 }
979                 /* We must put empty rule, qpn is being ignored */
980                 ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
981                                       mlx4_spec);
982                 if (ret < 0) {
983                         pr_info("invalid parsing\n");
984                         return -EINVAL;
985                 }
986
987                 mlx4_spec = (void *)mlx4_spec + ret;
988                 size += ret;
989         }
990         return size;
991 }
992
993 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
994                           int domain,
995                           enum mlx4_net_trans_promisc_mode flow_type,
996                           u64 *reg_id)
997 {
998         int ret, i;
999         int size = 0;
1000         void *ib_flow;
1001         struct mlx4_ib_dev *mdev = to_mdev(qp->device);
1002         struct mlx4_cmd_mailbox *mailbox;
1003         struct mlx4_net_trans_rule_hw_ctrl *ctrl;
1004         int default_flow;
1005
1006         static const u16 __mlx4_domain[] = {
1007                 [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
1008                 [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
1009                 [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
1010                 [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
1011         };
1012
1013         if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
1014                 pr_err("Invalid priority value %d\n", flow_attr->priority);
1015                 return -EINVAL;
1016         }
1017
1018         if (domain >= IB_FLOW_DOMAIN_NUM) {
1019                 pr_err("Invalid domain value %d\n", domain);
1020                 return -EINVAL;
1021         }
1022
1023         if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
1024                 return -EINVAL;
1025
1026         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
1027         if (IS_ERR(mailbox))
1028                 return PTR_ERR(mailbox);
1029         ctrl = mailbox->buf;
1030
1031         ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
1032                                  flow_attr->priority);
1033         ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
1034         ctrl->port = flow_attr->port;
1035         ctrl->qpn = cpu_to_be32(qp->qp_num);
1036
1037         ib_flow = flow_attr + 1;
1038         size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
1039         /* Add default flows */
1040         default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
1041         if (default_flow >= 0) {
1042                 ret = __mlx4_ib_create_default_rules(
1043                                 mdev, qp, default_table + default_flow,
1044                                 mailbox->buf + size);
1045                 if (ret < 0) {
1046                         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1047                         return -EINVAL;
1048                 }
1049                 size += ret;
1050         }
1051         for (i = 0; i < flow_attr->num_of_specs; i++) {
1052                 ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
1053                                       mailbox->buf + size);
1054                 if (ret < 0) {
1055                         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1056                         return -EINVAL;
1057                 }
1058                 ib_flow += ((union ib_flow_spec *) ib_flow)->size;
1059                 size += ret;
1060         }
1061
1062         ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
1063                            MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
1064                            MLX4_CMD_NATIVE);
1065         if (ret == -ENOMEM)
1066                 pr_err("mcg table is full. Fail to register network rule.\n");
1067         else if (ret == -ENXIO)
1068                 pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
1069         else if (ret)
1070                 pr_err("Invalid argumant. Fail to register network rule.\n");
1071
1072         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1073         return ret;
1074 }
1075
1076 static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
1077 {
1078         int err;
1079         err = mlx4_cmd(dev, reg_id, 0, 0,
1080                        MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
1081                        MLX4_CMD_NATIVE);
1082         if (err)
1083                 pr_err("Fail to detach network rule. registration id = 0x%llx\n",
1084                        reg_id);
1085         return err;
1086 }
1087
1088 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1089                                     struct ib_flow_attr *flow_attr,
1090                                     int domain)
1091 {
1092         int err = 0, i = 0;
1093         struct mlx4_ib_flow *mflow;
1094         enum mlx4_net_trans_promisc_mode type[2];
1095
1096         memset(type, 0, sizeof(type));
1097
1098         mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
1099         if (!mflow) {
1100                 err = -ENOMEM;
1101                 goto err_free;
1102         }
1103
1104         switch (flow_attr->type) {
1105         case IB_FLOW_ATTR_NORMAL:
1106                 type[0] = MLX4_FS_REGULAR;
1107                 break;
1108
1109         case IB_FLOW_ATTR_ALL_DEFAULT:
1110                 type[0] = MLX4_FS_ALL_DEFAULT;
1111                 break;
1112
1113         case IB_FLOW_ATTR_MC_DEFAULT:
1114                 type[0] = MLX4_FS_MC_DEFAULT;
1115                 break;
1116
1117         case IB_FLOW_ATTR_SNIFFER:
1118                 type[0] = MLX4_FS_UC_SNIFFER;
1119                 type[1] = MLX4_FS_MC_SNIFFER;
1120                 break;
1121
1122         default:
1123                 err = -EINVAL;
1124                 goto err_free;
1125         }
1126
1127         while (i < ARRAY_SIZE(type) && type[i]) {
1128                 err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
1129                                             &mflow->reg_id[i]);
1130                 if (err)
1131                         goto err_free;
1132                 i++;
1133         }
1134
1135         return &mflow->ibflow;
1136
1137 err_free:
1138         kfree(mflow);
1139         return ERR_PTR(err);
1140 }
1141
1142 static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
1143 {
1144         int err, ret = 0;
1145         int i = 0;
1146         struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
1147         struct mlx4_ib_flow *mflow = to_mflow(flow_id);
1148
1149         while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
1150                 err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
1151                 if (err)
1152                         ret = err;
1153                 i++;
1154         }
1155
1156         kfree(mflow);
1157         return ret;
1158 }
1159
1160 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1161 {
1162         int err;
1163         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1164         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1165         u64 reg_id;
1166         struct mlx4_ib_steering *ib_steering = NULL;
1167
1168         if (mdev->dev->caps.steering_mode ==
1169             MLX4_STEERING_MODE_DEVICE_MANAGED) {
1170                 ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
1171                 if (!ib_steering)
1172                         return -ENOMEM;
1173         }
1174
1175         err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
1176                                     !!(mqp->flags &
1177                                        MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1178                                     MLX4_PROT_IB_IPV6, &reg_id);
1179         if (err)
1180                 goto err_malloc;
1181
1182         err = add_gid_entry(ibqp, gid);
1183         if (err)
1184                 goto err_add;
1185
1186         if (ib_steering) {
1187                 memcpy(ib_steering->gid.raw, gid->raw, 16);
1188                 ib_steering->reg_id = reg_id;
1189                 mutex_lock(&mqp->mutex);
1190                 list_add(&ib_steering->list, &mqp->steering_rules);
1191                 mutex_unlock(&mqp->mutex);
1192         }
1193         return 0;
1194
1195 err_add:
1196         mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1197                               MLX4_PROT_IB_IPV6, reg_id);
1198 err_malloc:
1199         kfree(ib_steering);
1200
1201         return err;
1202 }
1203
1204 static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
1205 {
1206         struct mlx4_ib_gid_entry *ge;
1207         struct mlx4_ib_gid_entry *tmp;
1208         struct mlx4_ib_gid_entry *ret = NULL;
1209
1210         list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1211                 if (!memcmp(raw, ge->gid.raw, 16)) {
1212                         ret = ge;
1213                         break;
1214                 }
1215         }
1216
1217         return ret;
1218 }
1219
1220 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1221 {
1222         int err;
1223         struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1224         struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1225         u8 mac[6];
1226         struct net_device *ndev;
1227         struct mlx4_ib_gid_entry *ge;
1228         u64 reg_id = 0;
1229
1230         if (mdev->dev->caps.steering_mode ==
1231             MLX4_STEERING_MODE_DEVICE_MANAGED) {
1232                 struct mlx4_ib_steering *ib_steering;
1233
1234                 mutex_lock(&mqp->mutex);
1235                 list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
1236                         if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
1237                                 list_del(&ib_steering->list);
1238                                 break;
1239                         }
1240                 }
1241                 mutex_unlock(&mqp->mutex);
1242                 if (&ib_steering->list == &mqp->steering_rules) {
1243                         pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
1244                         return -EINVAL;
1245                 }
1246                 reg_id = ib_steering->reg_id;
1247                 kfree(ib_steering);
1248         }
1249
1250         err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1251                                     MLX4_PROT_IB_IPV6, reg_id);
1252         if (err)
1253                 return err;
1254
1255         mutex_lock(&mqp->mutex);
1256         ge = find_gid_entry(mqp, gid->raw);
1257         if (ge) {
1258                 spin_lock(&mdev->iboe.lock);
1259                 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
1260                 if (ndev)
1261                         dev_hold(ndev);
1262                 spin_unlock(&mdev->iboe.lock);
1263                 rdma_get_mcast_mac((struct in6_addr *)gid, mac);
1264                 if (ndev) {
1265                         rtnl_lock();
1266                         dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
1267                         rtnl_unlock();
1268                         dev_put(ndev);
1269                 }
1270                 list_del(&ge->list);
1271                 kfree(ge);
1272         } else
1273                 pr_warn("could not find mgid entry\n");
1274
1275         mutex_unlock(&mqp->mutex);
1276
1277         return 0;
1278 }
1279
1280 static int init_node_data(struct mlx4_ib_dev *dev)
1281 {
1282         struct ib_smp *in_mad  = NULL;
1283         struct ib_smp *out_mad = NULL;
1284         int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
1285         int err = -ENOMEM;
1286
1287         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
1288         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
1289         if (!in_mad || !out_mad)
1290                 goto out;
1291
1292         init_query_mad(in_mad);
1293         in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
1294         if (mlx4_is_master(dev->dev))
1295                 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
1296
1297         err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
1298         if (err)
1299                 goto out;
1300
1301         memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
1302
1303         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
1304
1305         err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
1306         if (err)
1307                 goto out;
1308
1309         dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
1310         memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
1311
1312 out:
1313         kfree(in_mad);
1314         kfree(out_mad);
1315         return err;
1316 }
1317
1318 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1319                         char *buf)
1320 {
1321         struct mlx4_ib_dev *dev =
1322                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1323         return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
1324 }
1325
1326 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1327                            char *buf)
1328 {
1329         struct mlx4_ib_dev *dev =
1330                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1331         return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
1332                        (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
1333                        (int) dev->dev->caps.fw_ver & 0xffff);
1334 }
1335
1336 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1337                         char *buf)
1338 {
1339         struct mlx4_ib_dev *dev =
1340                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1341         return sprintf(buf, "%x\n", dev->dev->rev_id);
1342 }
1343
1344 static ssize_t show_board(struct device *device, struct device_attribute *attr,
1345                           char *buf)
1346 {
1347         struct mlx4_ib_dev *dev =
1348                 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
1349         return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
1350                        dev->dev->board_id);
1351 }
1352
1353 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1354 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1355 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1356 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1357
1358 static struct device_attribute *mlx4_class_attributes[] = {
1359         &dev_attr_hw_rev,
1360         &dev_attr_fw_ver,
1361         &dev_attr_hca_type,
1362         &dev_attr_board_id
1363 };
1364
1365 static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
1366 {
1367         memcpy(eui, dev->dev_addr, 3);
1368         memcpy(eui + 5, dev->dev_addr + 3, 3);
1369         if (vlan_id < 0x1000) {
1370                 eui[3] = vlan_id >> 8;
1371                 eui[4] = vlan_id & 0xff;
1372         } else {
1373                 eui[3] = 0xff;
1374                 eui[4] = 0xfe;
1375         }
1376         eui[0] ^= 2;
1377 }
1378
1379 static void update_gids_task(struct work_struct *work)
1380 {
1381         struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
1382         struct mlx4_cmd_mailbox *mailbox;
1383         union ib_gid *gids;
1384         int err;
1385         struct mlx4_dev *dev = gw->dev->dev;
1386
1387         mailbox = mlx4_alloc_cmd_mailbox(dev);
1388         if (IS_ERR(mailbox)) {
1389                 pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
1390                 return;
1391         }
1392
1393         gids = mailbox->buf;
1394         memcpy(gids, gw->gids, sizeof gw->gids);
1395
1396         err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
1397                        1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1398                        MLX4_CMD_WRAPPED);
1399         if (err)
1400                 pr_warn("set port command failed\n");
1401         else {
1402                 memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
1403                 mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
1404         }
1405
1406         mlx4_free_cmd_mailbox(dev, mailbox);
1407         kfree(gw);
1408 }
1409
1410 static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
1411 {
1412         struct net_device *ndev = dev->iboe.netdevs[port - 1];
1413         struct update_gid_work *work;
1414         struct net_device *tmp;
1415         int i;
1416         u8 *hits;
1417         int ret;
1418         union ib_gid gid;
1419         int free;
1420         int found;
1421         int need_update = 0;
1422         u16 vid;
1423
1424         work = kzalloc(sizeof *work, GFP_ATOMIC);
1425         if (!work)
1426                 return -ENOMEM;
1427
1428         hits = kzalloc(128, GFP_ATOMIC);
1429         if (!hits) {
1430                 ret = -ENOMEM;
1431                 goto out;
1432         }
1433
1434         rcu_read_lock();
1435         for_each_netdev_rcu(&init_net, tmp) {
1436                 if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
1437                         gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
1438                         vid = rdma_vlan_dev_vlan_id(tmp);
1439                         mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
1440                         found = 0;
1441                         free = -1;
1442                         for (i = 0; i < 128; ++i) {
1443                                 if (free < 0 &&
1444                                     !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
1445                                         free = i;
1446                                 if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
1447                                         hits[i] = 1;
1448                                         found = 1;
1449                                         break;
1450                                 }
1451                         }
1452
1453                         if (!found) {
1454                                 if (tmp == ndev &&
1455                                     (memcmp(&dev->iboe.gid_table[port - 1][0],
1456                                             &gid, sizeof gid) ||
1457                                      !memcmp(&dev->iboe.gid_table[port - 1][0],
1458                                              &zgid, sizeof gid))) {
1459                                         dev->iboe.gid_table[port - 1][0] = gid;
1460                                         ++need_update;
1461                                         hits[0] = 1;
1462                                 } else if (free >= 0) {
1463                                         dev->iboe.gid_table[port - 1][free] = gid;
1464                                         hits[free] = 1;
1465                                         ++need_update;
1466                                 }
1467                         }
1468                 }
1469         }
1470         rcu_read_unlock();
1471
1472         for (i = 0; i < 128; ++i)
1473                 if (!hits[i]) {
1474                         if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
1475                                 ++need_update;
1476                         dev->iboe.gid_table[port - 1][i] = zgid;
1477                 }
1478
1479         if (need_update) {
1480                 memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
1481                 INIT_WORK(&work->work, update_gids_task);
1482                 work->port = port;
1483                 work->dev = dev;
1484                 queue_work(wq, &work->work);
1485         } else
1486                 kfree(work);
1487
1488         kfree(hits);
1489         return 0;
1490
1491 out:
1492         kfree(work);
1493         return ret;
1494 }
1495
1496 static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
1497 {
1498         switch (event) {
1499         case NETDEV_UP:
1500         case NETDEV_CHANGEADDR:
1501                 update_ipv6_gids(dev, port, 0);
1502                 break;
1503
1504         case NETDEV_DOWN:
1505                 update_ipv6_gids(dev, port, 1);
1506                 dev->iboe.netdevs[port - 1] = NULL;
1507         }
1508 }
1509
1510 static void netdev_added(struct mlx4_ib_dev *dev, int port)
1511 {
1512         update_ipv6_gids(dev, port, 0);
1513 }
1514
1515 static void netdev_removed(struct mlx4_ib_dev *dev, int port)
1516 {
1517         update_ipv6_gids(dev, port, 1);
1518 }
1519
1520 static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
1521                                 void *ptr)
1522 {
1523         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1524         struct mlx4_ib_dev *ibdev;
1525         struct net_device *oldnd;
1526         struct mlx4_ib_iboe *iboe;
1527         int port;
1528
1529         if (!net_eq(dev_net(dev), &init_net))
1530                 return NOTIFY_DONE;
1531
1532         ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
1533         iboe = &ibdev->iboe;
1534
1535         spin_lock(&iboe->lock);
1536         mlx4_foreach_ib_transport_port(port, ibdev->dev) {
1537                 oldnd = iboe->netdevs[port - 1];
1538                 iboe->netdevs[port - 1] =
1539                         mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
1540                 if (oldnd != iboe->netdevs[port - 1]) {
1541                         if (iboe->netdevs[port - 1])
1542                                 netdev_added(ibdev, port);
1543                         else
1544                                 netdev_removed(ibdev, port);
1545                 }
1546         }
1547
1548         if (dev == iboe->netdevs[0] ||
1549             (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
1550                 handle_en_event(ibdev, 1, event);
1551         else if (dev == iboe->netdevs[1]
1552                  || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
1553                 handle_en_event(ibdev, 2, event);
1554
1555         spin_unlock(&iboe->lock);
1556
1557         return NOTIFY_DONE;
1558 }
1559
1560 static void init_pkeys(struct mlx4_ib_dev *ibdev)
1561 {
1562         int port;
1563         int slave;
1564         int i;
1565
1566         if (mlx4_is_master(ibdev->dev)) {
1567                 for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
1568                         for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1569                                 for (i = 0;
1570                                      i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1571                                      ++i) {
1572                                         ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
1573                                         /* master has the identity virt2phys pkey mapping */
1574                                                 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
1575                                                         ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
1576                                         mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
1577                                                              ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
1578                                 }
1579                         }
1580                 }
1581                 /* initialize pkey cache */
1582                 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1583                         for (i = 0;
1584                              i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1585                              ++i)
1586                                 ibdev->pkeys.phys_pkey_cache[port-1][i] =
1587                                         (i) ? 0 : 0xFFFF;
1588                 }
1589         }
1590 }
1591
1592 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1593 {
1594         char name[32];
1595         int eq_per_port = 0;
1596         int added_eqs = 0;
1597         int total_eqs = 0;
1598         int i, j, eq;
1599
1600         /* Legacy mode or comp_pool is not large enough */
1601         if (dev->caps.comp_pool == 0 ||
1602             dev->caps.num_ports > dev->caps.comp_pool)
1603                 return;
1604
1605         eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
1606                                         dev->caps.num_ports);
1607
1608         /* Init eq table */
1609         added_eqs = 0;
1610         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
1611                 added_eqs += eq_per_port;
1612
1613         total_eqs = dev->caps.num_comp_vectors + added_eqs;
1614
1615         ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
1616         if (!ibdev->eq_table)
1617                 return;
1618
1619         ibdev->eq_added = added_eqs;
1620
1621         eq = 0;
1622         mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
1623                 for (j = 0; j < eq_per_port; j++) {
1624                         sprintf(name, "mlx4-ib-%d-%d@%s",
1625                                 i, j, dev->pdev->bus->name);
1626                         /* Set IRQ for specific name (per ring) */
1627                         if (mlx4_assign_eq(dev, name, NULL,
1628                                            &ibdev->eq_table[eq])) {
1629                                 /* Use legacy (same as mlx4_en driver) */
1630                                 pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
1631                                 ibdev->eq_table[eq] =
1632                                         (eq % dev->caps.num_comp_vectors);
1633                         }
1634                         eq++;
1635                 }
1636         }
1637
1638         /* Fill the reset of the vector with legacy EQ */
1639         for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
1640                 ibdev->eq_table[eq++] = i;
1641
1642         /* Advertise the new number of EQs to clients */
1643         ibdev->ib_dev.num_comp_vectors = total_eqs;
1644 }
1645
1646 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1647 {
1648         int i;
1649
1650         /* no additional eqs were added */
1651         if (!ibdev->eq_table)
1652                 return;
1653
1654         /* Reset the advertised EQ number */
1655         ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
1656
1657         /* Free only the added eqs */
1658         for (i = 0; i < ibdev->eq_added; i++) {
1659                 /* Don't free legacy eqs if used */
1660                 if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
1661                         continue;
1662                 mlx4_release_eq(dev, ibdev->eq_table[i]);
1663         }
1664
1665         kfree(ibdev->eq_table);
1666 }
1667
1668 static void *mlx4_ib_add(struct mlx4_dev *dev)
1669 {
1670         struct mlx4_ib_dev *ibdev;
1671         int num_ports = 0;
1672         int i, j;
1673         int err;
1674         struct mlx4_ib_iboe *iboe;
1675
1676         pr_info_once("%s", mlx4_ib_version);
1677
1678         mlx4_foreach_non_ib_transport_port(i, dev)
1679                 num_ports++;
1680
1681         if (mlx4_is_mfunc(dev) && num_ports) {
1682                 dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
1683                 return NULL;
1684         }
1685
1686         num_ports = 0;
1687         mlx4_foreach_ib_transport_port(i, dev)
1688                 num_ports++;
1689
1690         /* No point in registering a device with no ports... */
1691         if (num_ports == 0)
1692                 return NULL;
1693
1694         ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
1695         if (!ibdev) {
1696                 dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
1697                 return NULL;
1698         }
1699
1700         iboe = &ibdev->iboe;
1701
1702         if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
1703                 goto err_dealloc;
1704
1705         if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
1706                 goto err_pd;
1707
1708         ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
1709                                  PAGE_SIZE);
1710         if (!ibdev->uar_map)
1711                 goto err_uar;
1712         MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
1713
1714         ibdev->dev = dev;
1715
1716         strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
1717         ibdev->ib_dev.owner             = THIS_MODULE;
1718         ibdev->ib_dev.node_type         = RDMA_NODE_IB_CA;
1719         ibdev->ib_dev.local_dma_lkey    = dev->caps.reserved_lkey;
1720         ibdev->num_ports                = num_ports;
1721         ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
1722         ibdev->ib_dev.num_comp_vectors  = dev->caps.num_comp_vectors;
1723         ibdev->ib_dev.dma_device        = &dev->pdev->dev;
1724
1725         if (dev->caps.userspace_caps)
1726                 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
1727         else
1728                 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
1729
1730         ibdev->ib_dev.uverbs_cmd_mask   =
1731                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
1732                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
1733                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
1734                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
1735                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
1736                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
1737                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
1738                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1739                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
1740                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
1741                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
1742                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
1743                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
1744                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
1745                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
1746                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
1747                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
1748                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
1749                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
1750                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
1751                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
1752                 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)         |
1753                 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
1754
1755         ibdev->ib_dev.query_device      = mlx4_ib_query_device;
1756         ibdev->ib_dev.query_port        = mlx4_ib_query_port;
1757         ibdev->ib_dev.get_link_layer    = mlx4_ib_port_link_layer;
1758         ibdev->ib_dev.query_gid         = mlx4_ib_query_gid;
1759         ibdev->ib_dev.query_pkey        = mlx4_ib_query_pkey;
1760         ibdev->ib_dev.modify_device     = mlx4_ib_modify_device;
1761         ibdev->ib_dev.modify_port       = mlx4_ib_modify_port;
1762         ibdev->ib_dev.alloc_ucontext    = mlx4_ib_alloc_ucontext;
1763         ibdev->ib_dev.dealloc_ucontext  = mlx4_ib_dealloc_ucontext;
1764         ibdev->ib_dev.mmap              = mlx4_ib_mmap;
1765         ibdev->ib_dev.alloc_pd          = mlx4_ib_alloc_pd;
1766         ibdev->ib_dev.dealloc_pd        = mlx4_ib_dealloc_pd;
1767         ibdev->ib_dev.create_ah         = mlx4_ib_create_ah;
1768         ibdev->ib_dev.query_ah          = mlx4_ib_query_ah;
1769         ibdev->ib_dev.destroy_ah        = mlx4_ib_destroy_ah;
1770         ibdev->ib_dev.create_srq        = mlx4_ib_create_srq;
1771         ibdev->ib_dev.modify_srq        = mlx4_ib_modify_srq;
1772         ibdev->ib_dev.query_srq         = mlx4_ib_query_srq;
1773         ibdev->ib_dev.destroy_srq       = mlx4_ib_destroy_srq;
1774         ibdev->ib_dev.post_srq_recv     = mlx4_ib_post_srq_recv;
1775         ibdev->ib_dev.create_qp         = mlx4_ib_create_qp;
1776         ibdev->ib_dev.modify_qp         = mlx4_ib_modify_qp;
1777         ibdev->ib_dev.query_qp          = mlx4_ib_query_qp;
1778         ibdev->ib_dev.destroy_qp        = mlx4_ib_destroy_qp;
1779         ibdev->ib_dev.post_send         = mlx4_ib_post_send;
1780         ibdev->ib_dev.post_recv         = mlx4_ib_post_recv;
1781         ibdev->ib_dev.create_cq         = mlx4_ib_create_cq;
1782         ibdev->ib_dev.modify_cq         = mlx4_ib_modify_cq;
1783         ibdev->ib_dev.resize_cq         = mlx4_ib_resize_cq;
1784         ibdev->ib_dev.destroy_cq        = mlx4_ib_destroy_cq;
1785         ibdev->ib_dev.poll_cq           = mlx4_ib_poll_cq;
1786         ibdev->ib_dev.req_notify_cq     = mlx4_ib_arm_cq;
1787         ibdev->ib_dev.get_dma_mr        = mlx4_ib_get_dma_mr;
1788         ibdev->ib_dev.reg_user_mr       = mlx4_ib_reg_user_mr;
1789         ibdev->ib_dev.dereg_mr          = mlx4_ib_dereg_mr;
1790         ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
1791         ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
1792         ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
1793         ibdev->ib_dev.attach_mcast      = mlx4_ib_mcg_attach;
1794         ibdev->ib_dev.detach_mcast      = mlx4_ib_mcg_detach;
1795         ibdev->ib_dev.process_mad       = mlx4_ib_process_mad;
1796
1797         if (!mlx4_is_slave(ibdev->dev)) {
1798                 ibdev->ib_dev.alloc_fmr         = mlx4_ib_fmr_alloc;
1799                 ibdev->ib_dev.map_phys_fmr      = mlx4_ib_map_phys_fmr;
1800                 ibdev->ib_dev.unmap_fmr         = mlx4_ib_unmap_fmr;
1801                 ibdev->ib_dev.dealloc_fmr       = mlx4_ib_fmr_dealloc;
1802         }
1803
1804         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
1805             dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
1806                 ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
1807                 ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
1808                 ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
1809
1810                 ibdev->ib_dev.uverbs_cmd_mask |=
1811                         (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
1812                         (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
1813         }
1814
1815         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
1816                 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
1817                 ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
1818                 ibdev->ib_dev.uverbs_cmd_mask |=
1819                         (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1820                         (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1821         }
1822
1823         if (check_flow_steering_support(dev)) {
1824                 ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
1825                 ibdev->ib_dev.create_flow       = mlx4_ib_create_flow;
1826                 ibdev->ib_dev.destroy_flow      = mlx4_ib_destroy_flow;
1827
1828                 ibdev->ib_dev.uverbs_ex_cmd_mask        |=
1829                         (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
1830                         (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
1831         }
1832
1833         mlx4_ib_alloc_eqs(dev, ibdev);
1834
1835         spin_lock_init(&iboe->lock);
1836
1837         if (init_node_data(ibdev))
1838                 goto err_map;
1839
1840         for (i = 0; i < ibdev->num_ports; ++i) {
1841                 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
1842                                                 IB_LINK_LAYER_ETHERNET) {
1843                         err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
1844                         if (err)
1845                                 ibdev->counters[i] = -1;
1846                 } else
1847                                 ibdev->counters[i] = -1;
1848         }
1849
1850         spin_lock_init(&ibdev->sm_lock);
1851         mutex_init(&ibdev->cap_mask_mutex);
1852
1853         if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
1854                 ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
1855                 err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
1856                                             MLX4_IB_UC_STEER_QPN_ALIGN,
1857                                             &ibdev->steer_qpn_base);
1858                 if (err)
1859                         goto err_counter;
1860
1861                 ibdev->ib_uc_qpns_bitmap =
1862                         kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
1863                                 sizeof(long),
1864                                 GFP_KERNEL);
1865                 if (!ibdev->ib_uc_qpns_bitmap) {
1866                         dev_err(&dev->pdev->dev, "bit map alloc failed\n");
1867                         goto err_steer_qp_release;
1868                 }
1869
1870                 bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
1871
1872                 err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
1873                                 dev, ibdev->steer_qpn_base,
1874                                 ibdev->steer_qpn_base +
1875                                 ibdev->steer_qpn_count - 1);
1876                 if (err)
1877                         goto err_steer_free_bitmap;
1878         }
1879
1880         if (ib_register_device(&ibdev->ib_dev, NULL))
1881                 goto err_steer_free_bitmap;
1882
1883         if (mlx4_ib_mad_init(ibdev))
1884                 goto err_reg;
1885
1886         if (mlx4_ib_init_sriov(ibdev))
1887                 goto err_mad;
1888
1889         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
1890                 iboe->nb.notifier_call = mlx4_ib_netdev_event;
1891                 err = register_netdevice_notifier(&iboe->nb);
1892                 if (err)
1893                         goto err_sriov;
1894         }
1895
1896         for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
1897                 if (device_create_file(&ibdev->ib_dev.dev,
1898                                        mlx4_class_attributes[j]))
1899                         goto err_notif;
1900         }
1901
1902         ibdev->ib_active = true;
1903
1904         if (mlx4_is_mfunc(ibdev->dev))
1905                 init_pkeys(ibdev);
1906
1907         /* create paravirt contexts for any VFs which are active */
1908         if (mlx4_is_master(ibdev->dev)) {
1909                 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
1910                         if (j == mlx4_master_func_num(ibdev->dev))
1911                                 continue;
1912                         if (mlx4_is_slave_active(ibdev->dev, j))
1913                                 do_slave_init(ibdev, j, 1);
1914                 }
1915         }
1916         return ibdev;
1917
1918 err_notif:
1919         if (unregister_netdevice_notifier(&ibdev->iboe.nb))
1920                 pr_warn("failure unregistering notifier\n");
1921         flush_workqueue(wq);
1922
1923 err_sriov:
1924         mlx4_ib_close_sriov(ibdev);
1925
1926 err_mad:
1927         mlx4_ib_mad_cleanup(ibdev);
1928
1929 err_reg:
1930         ib_unregister_device(&ibdev->ib_dev);
1931
1932 err_steer_free_bitmap:
1933         kfree(ibdev->ib_uc_qpns_bitmap);
1934
1935 err_steer_qp_release:
1936         if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
1937                 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
1938                                       ibdev->steer_qpn_count);
1939 err_counter:
1940         for (; i; --i)
1941                 if (ibdev->counters[i - 1] != -1)
1942                         mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]);
1943
1944 err_map:
1945         iounmap(ibdev->uar_map);
1946
1947 err_uar:
1948         mlx4_uar_free(dev, &ibdev->priv_uar);
1949
1950 err_pd:
1951         mlx4_pd_free(dev, ibdev->priv_pdn);
1952
1953 err_dealloc:
1954         ib_dealloc_device(&ibdev->ib_dev);
1955
1956         return NULL;
1957 }
1958
1959 int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
1960 {
1961         int offset;
1962
1963         WARN_ON(!dev->ib_uc_qpns_bitmap);
1964
1965         offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
1966                                          dev->steer_qpn_count,
1967                                          get_count_order(count));
1968         if (offset < 0)
1969                 return offset;
1970
1971         *qpn = dev->steer_qpn_base + offset;
1972         return 0;
1973 }
1974
1975 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
1976 {
1977         if (!qpn ||
1978             dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
1979                 return;
1980
1981         BUG_ON(qpn < dev->steer_qpn_base);
1982
1983         bitmap_release_region(dev->ib_uc_qpns_bitmap,
1984                               qpn - dev->steer_qpn_base,
1985                               get_count_order(count));
1986 }
1987
1988 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1989                          int is_attach)
1990 {
1991         int err;
1992         size_t flow_size;
1993         struct ib_flow_attr *flow = NULL;
1994         struct ib_flow_spec_ib *ib_spec;
1995
1996         if (is_attach) {
1997                 flow_size = sizeof(struct ib_flow_attr) +
1998                             sizeof(struct ib_flow_spec_ib);
1999                 flow = kzalloc(flow_size, GFP_KERNEL);
2000                 if (!flow)
2001                         return -ENOMEM;
2002                 flow->port = mqp->port;
2003                 flow->num_of_specs = 1;
2004                 flow->size = flow_size;
2005                 ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
2006                 ib_spec->type = IB_FLOW_SPEC_IB;
2007                 ib_spec->size = sizeof(struct ib_flow_spec_ib);
2008                 /* Add an empty rule for IB L2 */
2009                 memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
2010
2011                 err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
2012                                             IB_FLOW_DOMAIN_NIC,
2013                                             MLX4_FS_REGULAR,
2014                                             &mqp->reg_id);
2015         } else {
2016                 err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
2017         }
2018         kfree(flow);
2019         return err;
2020 }
2021
2022 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
2023 {
2024         struct mlx4_ib_dev *ibdev = ibdev_ptr;
2025         int p;
2026
2027         mlx4_ib_close_sriov(ibdev);
2028         mlx4_ib_mad_cleanup(ibdev);
2029         ib_unregister_device(&ibdev->ib_dev);
2030         if (ibdev->iboe.nb.notifier_call) {
2031                 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2032                         pr_warn("failure unregistering notifier\n");
2033                 ibdev->iboe.nb.notifier_call = NULL;
2034         }
2035
2036         if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
2037                 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2038                                       ibdev->steer_qpn_count);
2039                 kfree(ibdev->ib_uc_qpns_bitmap);
2040         }
2041
2042         iounmap(ibdev->uar_map);
2043         for (p = 0; p < ibdev->num_ports; ++p)
2044                 if (ibdev->counters[p] != -1)
2045                         mlx4_counter_free(ibdev->dev, ibdev->counters[p]);
2046         mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
2047                 mlx4_CLOSE_PORT(dev, p);
2048
2049         mlx4_ib_free_eqs(dev, ibdev);
2050
2051         mlx4_uar_free(dev, &ibdev->priv_uar);
2052         mlx4_pd_free(dev, ibdev->priv_pdn);
2053         ib_dealloc_device(&ibdev->ib_dev);
2054 }
2055
2056 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
2057 {
2058         struct mlx4_ib_demux_work **dm = NULL;
2059         struct mlx4_dev *dev = ibdev->dev;
2060         int i;
2061         unsigned long flags;
2062
2063         if (!mlx4_is_master(dev))
2064                 return;
2065
2066         dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
2067         if (!dm) {
2068                 pr_err("failed to allocate memory for tunneling qp update\n");
2069                 goto out;
2070         }
2071
2072         for (i = 0; i < dev->caps.num_ports; i++) {
2073                 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
2074                 if (!dm[i]) {
2075                         pr_err("failed to allocate memory for tunneling qp update work struct\n");
2076                         for (i = 0; i < dev->caps.num_ports; i++) {
2077                                 if (dm[i])
2078                                         kfree(dm[i]);
2079                         }
2080                         goto out;
2081                 }
2082         }
2083         /* initialize or tear down tunnel QPs for the slave */
2084         for (i = 0; i < dev->caps.num_ports; i++) {
2085                 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
2086                 dm[i]->port = i + 1;
2087                 dm[i]->slave = slave;
2088                 dm[i]->do_init = do_init;
2089                 dm[i]->dev = ibdev;
2090                 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
2091                 if (!ibdev->sriov.is_going_down)
2092                         queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
2093                 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
2094         }
2095 out:
2096         kfree(dm);
2097         return;
2098 }
2099
2100 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
2101                           enum mlx4_dev_event event, unsigned long param)
2102 {
2103         struct ib_event ibev;
2104         struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
2105         struct mlx4_eqe *eqe = NULL;
2106         struct ib_event_work *ew;
2107         int p = 0;
2108
2109         if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
2110                 eqe = (struct mlx4_eqe *)param;
2111         else
2112                 p = (int) param;
2113
2114         switch (event) {
2115         case MLX4_DEV_EVENT_PORT_UP:
2116                 if (p > ibdev->num_ports)
2117                         return;
2118                 if (mlx4_is_master(dev) &&
2119                     rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
2120                         IB_LINK_LAYER_INFINIBAND) {
2121                         mlx4_ib_invalidate_all_guid_record(ibdev, p);
2122                 }
2123                 ibev.event = IB_EVENT_PORT_ACTIVE;
2124                 break;
2125
2126         case MLX4_DEV_EVENT_PORT_DOWN:
2127                 if (p > ibdev->num_ports)
2128                         return;
2129                 ibev.event = IB_EVENT_PORT_ERR;
2130                 break;
2131
2132         case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
2133                 ibdev->ib_active = false;
2134                 ibev.event = IB_EVENT_DEVICE_FATAL;
2135                 break;
2136
2137         case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
2138                 ew = kmalloc(sizeof *ew, GFP_ATOMIC);
2139                 if (!ew) {
2140                         pr_err("failed to allocate memory for events work\n");
2141                         break;
2142                 }
2143
2144                 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
2145                 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
2146                 ew->ib_dev = ibdev;
2147                 /* need to queue only for port owner, which uses GEN_EQE */
2148                 if (mlx4_is_master(dev))
2149                         queue_work(wq, &ew->work);
2150                 else
2151                         handle_port_mgmt_change_event(&ew->work);
2152                 return;
2153
2154         case MLX4_DEV_EVENT_SLAVE_INIT:
2155                 /* here, p is the slave id */
2156                 do_slave_init(ibdev, p, 1);
2157                 return;
2158
2159         case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
2160                 /* here, p is the slave id */
2161                 do_slave_init(ibdev, p, 0);
2162                 return;
2163
2164         default:
2165                 return;
2166         }
2167
2168         ibev.device           = ibdev_ptr;
2169         ibev.element.port_num = (u8) p;
2170
2171         ib_dispatch_event(&ibev);
2172 }
2173
2174 static struct mlx4_interface mlx4_ib_interface = {
2175         .add            = mlx4_ib_add,
2176         .remove         = mlx4_ib_remove,
2177         .event          = mlx4_ib_event,
2178         .protocol       = MLX4_PROT_IB_IPV6
2179 };
2180
2181 static int __init mlx4_ib_init(void)
2182 {
2183         int err;
2184
2185         wq = create_singlethread_workqueue("mlx4_ib");
2186         if (!wq)
2187                 return -ENOMEM;
2188
2189         err = mlx4_ib_mcg_init();
2190         if (err)
2191                 goto clean_wq;
2192
2193         err = mlx4_register_interface(&mlx4_ib_interface);
2194         if (err)
2195                 goto clean_mcg;
2196
2197         return 0;
2198
2199 clean_mcg:
2200         mlx4_ib_mcg_destroy();
2201
2202 clean_wq:
2203         destroy_workqueue(wq);
2204         return err;
2205 }
2206
2207 static void __exit mlx4_ib_cleanup(void)
2208 {
2209         mlx4_unregister_interface(&mlx4_ib_interface);
2210         mlx4_ib_mcg_destroy();
2211         destroy_workqueue(wq);
2212 }
2213
2214 module_init(mlx4_ib_init);
2215 module_exit(mlx4_ib_cleanup);