RDMA: Add and use rdma_for_each_port
[linux-2.6-block.git] / drivers / infiniband / core / device.c
CommitLineData
1da177e4
LT
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
2a1d9b7f 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
1da177e4
LT
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
1da177e4
LT
32 */
33
34#include <linux/module.h>
35#include <linux/string.h>
36#include <linux/errno.h>
9a6b090c 37#include <linux/kernel.h>
1da177e4
LT
38#include <linux/slab.h>
39#include <linux/init.h>
9268f72d 40#include <linux/netdevice.h>
8f408ab6
DJ
41#include <linux/security.h>
42#include <linux/notifier.h>
b2cbae2c 43#include <rdma/rdma_netlink.h>
03db3a2d
MB
44#include <rdma/ib_addr.h>
45#include <rdma/ib_cache.h>
1da177e4
LT
46
47#include "core_priv.h"
41eda65c 48#include "restrack.h"
1da177e4
LT
49
50MODULE_AUTHOR("Roland Dreier");
51MODULE_DESCRIPTION("core kernel InfiniBand API");
52MODULE_LICENSE("Dual BSD/GPL");
53
14d3a3b2 54struct workqueue_struct *ib_comp_wq;
f794809a 55struct workqueue_struct *ib_comp_unbound_wq;
f0626710
TH
56struct workqueue_struct *ib_wq;
57EXPORT_SYMBOL_GPL(ib_wq);
58
921eab11
JG
59/*
60 * Each of the three rwsem locks (devices, clients, client_data) protects the
61 * xarray of the same name. Specifically it allows the caller to assert that
62 * the MARK will/will not be changing under the lock, and for devices and
63 * clients, that the value in the xarray is still a valid pointer. Change of
64 * the MARK is linked to the object state, so holding the lock and testing the
65 * MARK also asserts that the contained object is in a certain state.
66 *
67 * This is used to build a two stage register/unregister flow where objects
68 * can continue to be in the xarray even though they are still in progress to
69 * register/unregister.
70 *
71 * The xarray itself provides additional locking, and restartable iteration,
72 * which is also relied on.
73 *
74 * Locks should not be nested, with the exception of client_data, which is
75 * allowed to nest under the read side of the other two locks.
76 *
77 * The devices_rwsem also protects the device name list, any change or
78 * assignment of device name must also hold the write side to guarantee unique
79 * names.
80 */
81
0df91bb6
JG
82/*
83 * devices contains devices that have had their names assigned. The
84 * devices may not be registered. Users that care about the registration
85 * status need to call ib_device_try_get() on the device to ensure it is
86 * registered, and keep it registered, for the required duration.
87 *
88 */
89static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
921eab11 90static DECLARE_RWSEM(devices_rwsem);
0df91bb6
JG
91#define DEVICE_REGISTERED XA_MARK_1
92
1da177e4 93static LIST_HEAD(client_list);
e59178d8
JG
94#define CLIENT_REGISTERED XA_MARK_1
95static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
921eab11 96static DECLARE_RWSEM(clients_rwsem);
1da177e4
LT
97
98/*
0df91bb6
JG
99 * If client_data is registered then the corresponding client must also still
100 * be registered.
101 */
102#define CLIENT_DATA_REGISTERED XA_MARK_1
103/*
104 * xarray has this behavior where it won't iterate over NULL values stored in
105 * allocated arrays. So we need our own iterator to see all values stored in
106 * the array. This does the same thing as xa_for_each except that it also
107 * returns NULL valued entries if the array is allocating. Simplified to only
108 * work on simple xarrays.
109 */
110static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
111 xa_mark_t filter)
112{
113 XA_STATE(xas, xa, *indexp);
114 void *entry;
115
116 rcu_read_lock();
117 do {
118 entry = xas_find_marked(&xas, ULONG_MAX, filter);
119 if (xa_is_zero(entry))
120 break;
121 } while (xas_retry(&xas, entry));
122 rcu_read_unlock();
123
124 if (entry) {
125 *indexp = xas.xa_index;
126 if (xa_is_zero(entry))
127 return NULL;
128 return entry;
129 }
130 return XA_ERROR(-ENOENT);
131}
132#define xan_for_each_marked(xa, index, entry, filter) \
133 for (index = 0, entry = xan_find_marked(xa, &(index), filter); \
134 !xa_is_err(entry); \
135 (index)++, entry = xan_find_marked(xa, &(index), filter))
136
8f408ab6
DJ
137static int ib_security_change(struct notifier_block *nb, unsigned long event,
138 void *lsm_data);
139static void ib_policy_change_task(struct work_struct *work);
140static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task);
141
142static struct notifier_block ibdev_lsm_nb = {
143 .notifier_call = ib_security_change,
144};
1da177e4
LT
145
146static int ib_device_check_mandatory(struct ib_device *device)
147{
3023a1e9 148#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
1da177e4
LT
149 static const struct {
150 size_t offset;
151 char *name;
152 } mandatory_table[] = {
153 IB_MANDATORY_FUNC(query_device),
154 IB_MANDATORY_FUNC(query_port),
155 IB_MANDATORY_FUNC(query_pkey),
1da177e4
LT
156 IB_MANDATORY_FUNC(alloc_pd),
157 IB_MANDATORY_FUNC(dealloc_pd),
1da177e4
LT
158 IB_MANDATORY_FUNC(create_qp),
159 IB_MANDATORY_FUNC(modify_qp),
160 IB_MANDATORY_FUNC(destroy_qp),
161 IB_MANDATORY_FUNC(post_send),
162 IB_MANDATORY_FUNC(post_recv),
163 IB_MANDATORY_FUNC(create_cq),
164 IB_MANDATORY_FUNC(destroy_cq),
165 IB_MANDATORY_FUNC(poll_cq),
166 IB_MANDATORY_FUNC(req_notify_cq),
167 IB_MANDATORY_FUNC(get_dma_mr),
7738613e
IW
168 IB_MANDATORY_FUNC(dereg_mr),
169 IB_MANDATORY_FUNC(get_port_immutable)
1da177e4
LT
170 };
171 int i;
172
6780c4fa 173 device->kverbs_provider = true;
9a6b090c 174 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
3023a1e9
KH
175 if (!*(void **) ((void *) &device->ops +
176 mandatory_table[i].offset)) {
6780c4fa
GP
177 device->kverbs_provider = false;
178 break;
1da177e4
LT
179 }
180 }
181
182 return 0;
183}
184
f8978bd9 185/*
01b67117
PP
186 * Caller must perform ib_device_put() to return the device reference count
187 * when ib_device_get_by_index() returns valid device pointer.
f8978bd9
LR
188 */
189struct ib_device *ib_device_get_by_index(u32 index)
190{
191 struct ib_device *device;
192
921eab11 193 down_read(&devices_rwsem);
0df91bb6 194 device = xa_load(&devices, index);
01b67117 195 if (device) {
d79af724 196 if (!ib_device_try_get(device))
01b67117
PP
197 device = NULL;
198 }
921eab11 199 up_read(&devices_rwsem);
f8978bd9
LR
200 return device;
201}
202
d79af724
JG
203/**
204 * ib_device_put - Release IB device reference
205 * @device: device whose reference to be released
206 *
207 * ib_device_put() releases reference to the IB device to allow it to be
208 * unregistered and eventually free.
209 */
01b67117
PP
210void ib_device_put(struct ib_device *device)
211{
212 if (refcount_dec_and_test(&device->refcount))
213 complete(&device->unreg_completion);
214}
d79af724 215EXPORT_SYMBOL(ib_device_put);
01b67117 216
1da177e4
LT
217static struct ib_device *__ib_device_get_by_name(const char *name)
218{
219 struct ib_device *device;
0df91bb6 220 unsigned long index;
1da177e4 221
0df91bb6 222 xa_for_each (&devices, index, device)
896de009 223 if (!strcmp(name, dev_name(&device->dev)))
1da177e4
LT
224 return device;
225
226 return NULL;
227}
228
d21943dd
LR
229int ib_device_rename(struct ib_device *ibdev, const char *name)
230{
e3593b56 231 int ret;
d21943dd 232
921eab11 233 down_write(&devices_rwsem);
e3593b56
JG
234 if (!strcmp(name, dev_name(&ibdev->dev))) {
235 ret = 0;
236 goto out;
237 }
238
344684e6
JG
239 if (__ib_device_get_by_name(name)) {
240 ret = -EEXIST;
241 goto out;
d21943dd
LR
242 }
243
244 ret = device_rename(&ibdev->dev, name);
245 if (ret)
246 goto out;
247 strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
248out:
921eab11 249 up_write(&devices_rwsem);
d21943dd
LR
250 return ret;
251}
252
e349f858 253static int alloc_name(struct ib_device *ibdev, const char *name)
1da177e4 254{
1da177e4 255 struct ib_device *device;
0df91bb6 256 unsigned long index;
3b88afd3
JG
257 struct ida inuse;
258 int rc;
1da177e4
LT
259 int i;
260
921eab11 261 lockdep_assert_held_exclusive(&devices_rwsem);
3b88afd3 262 ida_init(&inuse);
0df91bb6 263 xa_for_each (&devices, index, device) {
e349f858
JG
264 char buf[IB_DEVICE_NAME_MAX];
265
896de009 266 if (sscanf(dev_name(&device->dev), name, &i) != 1)
1da177e4 267 continue;
3b88afd3 268 if (i < 0 || i >= INT_MAX)
1da177e4
LT
269 continue;
270 snprintf(buf, sizeof buf, name, i);
3b88afd3
JG
271 if (strcmp(buf, dev_name(&device->dev)) != 0)
272 continue;
273
274 rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL);
275 if (rc < 0)
276 goto out;
1da177e4
LT
277 }
278
3b88afd3
JG
279 rc = ida_alloc(&inuse, GFP_KERNEL);
280 if (rc < 0)
281 goto out;
1da177e4 282
3b88afd3
JG
283 rc = dev_set_name(&ibdev->dev, name, rc);
284out:
285 ida_destroy(&inuse);
286 return rc;
1da177e4
LT
287}
288
55aeed06
JG
289static void ib_device_release(struct device *device)
290{
291 struct ib_device *dev = container_of(device, struct ib_device, dev);
292
652432f3 293 WARN_ON(refcount_read(&dev->refcount));
d45f89d5 294 ib_cache_release_one(dev);
b34b269a
JG
295 ib_security_release_port_pkey_list(dev);
296 kfree(dev->port_pkey_list);
d45f89d5 297 kfree(dev->port_immutable);
0df91bb6 298 xa_destroy(&dev->client_data);
55aeed06
JG
299 kfree(dev);
300}
301
302static int ib_device_uevent(struct device *device,
303 struct kobj_uevent_env *env)
304{
896de009 305 if (add_uevent_var(env, "NAME=%s", dev_name(device)))
55aeed06
JG
306 return -ENOMEM;
307
308 /*
309 * It would be nice to pass the node GUID with the event...
310 */
311
312 return 0;
313}
314
315static struct class ib_class = {
316 .name = "infiniband",
317 .dev_release = ib_device_release,
318 .dev_uevent = ib_device_uevent,
319};
320
1da177e4 321/**
459cc69f 322 * _ib_alloc_device - allocate an IB device struct
1da177e4
LT
323 * @size:size of structure to allocate
324 *
325 * Low-level drivers should use ib_alloc_device() to allocate &struct
326 * ib_device. @size is the size of the structure to be allocated,
327 * including any private data used by the low-level driver.
328 * ib_dealloc_device() must be used to free structures allocated with
329 * ib_alloc_device().
330 */
459cc69f 331struct ib_device *_ib_alloc_device(size_t size)
1da177e4 332{
55aeed06
JG
333 struct ib_device *device;
334
335 if (WARN_ON(size < sizeof(struct ib_device)))
336 return NULL;
337
338 device = kzalloc(size, GFP_KERNEL);
339 if (!device)
340 return NULL;
341
41eda65c
LR
342 if (rdma_restrack_init(device)) {
343 kfree(device);
344 return NULL;
345 }
02d8883f 346
55aeed06 347 device->dev.class = &ib_class;
5f8f5499
PP
348 device->groups[0] = &ib_dev_attr_group;
349 device->dev.groups = device->groups;
55aeed06
JG
350 device_initialize(&device->dev);
351
55aeed06
JG
352 INIT_LIST_HEAD(&device->event_handler_list);
353 spin_lock_init(&device->event_handler_lock);
0df91bb6
JG
354 /*
355 * client_data needs to be alloc because we don't want our mark to be
356 * destroyed if the user stores NULL in the client data.
357 */
358 xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
921eab11 359 init_rwsem(&device->client_data_rwsem);
55aeed06 360 INIT_LIST_HEAD(&device->port_list);
01b67117 361 init_completion(&device->unreg_completion);
1da177e4 362
55aeed06 363 return device;
1da177e4 364}
459cc69f 365EXPORT_SYMBOL(_ib_alloc_device);
1da177e4
LT
366
367/**
368 * ib_dealloc_device - free an IB device struct
369 * @device:structure to free
370 *
371 * Free a structure allocated with ib_alloc_device().
372 */
373void ib_dealloc_device(struct ib_device *device)
374{
0df91bb6 375 WARN_ON(!xa_empty(&device->client_data));
652432f3 376 WARN_ON(refcount_read(&device->refcount));
0ad699c0 377 rdma_restrack_clean(device);
e155755e 378 /* Balances with device_initialize */
924b8900 379 put_device(&device->dev);
1da177e4
LT
380}
381EXPORT_SYMBOL(ib_dealloc_device);
382
921eab11
JG
383/*
384 * add_client_context() and remove_client_context() must be safe against
385 * parallel calls on the same device - registration/unregistration of both the
386 * device and client can be occurring in parallel.
387 *
388 * The routines need to be a fence, any caller must not return until the add
389 * or remove is fully completed.
390 */
391static int add_client_context(struct ib_device *device,
392 struct ib_client *client)
1da177e4 393{
921eab11 394 int ret = 0;
1da177e4 395
6780c4fa 396 if (!device->kverbs_provider && !client->no_kverbs_req)
921eab11
JG
397 return 0;
398
399 down_write(&device->client_data_rwsem);
400 /*
401 * Another caller to add_client_context got here first and has already
402 * completely initialized context.
403 */
404 if (xa_get_mark(&device->client_data, client->client_id,
405 CLIENT_DATA_REGISTERED))
406 goto out;
407
408 ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
409 GFP_KERNEL));
410 if (ret)
411 goto out;
412 downgrade_write(&device->client_data_rwsem);
413 if (client->add)
414 client->add(device);
415
416 /* Readers shall not see a client until add has been completed */
417 xa_set_mark(&device->client_data, client->client_id,
418 CLIENT_DATA_REGISTERED);
419 up_read(&device->client_data_rwsem);
420 return 0;
421
422out:
423 up_write(&device->client_data_rwsem);
424 return ret;
425}
426
427static void remove_client_context(struct ib_device *device,
428 unsigned int client_id)
429{
430 struct ib_client *client;
431 void *client_data;
6780c4fa 432
921eab11
JG
433 down_write(&device->client_data_rwsem);
434 if (!xa_get_mark(&device->client_data, client_id,
435 CLIENT_DATA_REGISTERED)) {
436 up_write(&device->client_data_rwsem);
437 return;
438 }
439 client_data = xa_load(&device->client_data, client_id);
440 xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
441 client = xa_load(&clients, client_id);
442 downgrade_write(&device->client_data_rwsem);
1da177e4 443
921eab11
JG
444 /*
445 * Notice we cannot be holding any exclusive locks when calling the
446 * remove callback as the remove callback can recurse back into any
447 * public functions in this module and thus try for any locks those
448 * functions take.
449 *
450 * For this reason clients and drivers should not call the
451 * unregistration functions will holdling any locks.
452 *
453 * It tempting to drop the client_data_rwsem too, but this is required
454 * to ensure that unregister_client does not return until all clients
455 * are completely unregistered, which is required to avoid module
456 * unloading races.
457 */
458 if (client->remove)
459 client->remove(device, client_data);
460
461 xa_erase(&device->client_data, client_id);
462 up_read(&device->client_data_rwsem);
1da177e4
LT
463}
464
337877a4
IW
465static int verify_immutable(const struct ib_device *dev, u8 port)
466{
467 return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
468 rdma_max_mad_size(dev, port) != 0);
469}
470
7738613e 471static int read_port_immutable(struct ib_device *device)
5eb620c8 472{
ea1075ed 473 unsigned int port;
55aeed06 474 int ret;
7738613e
IW
475
476 /**
477 * device->port_immutable is indexed directly by the port number to make
478 * access to this data as efficient as possible.
479 *
480 * Therefore port_immutable is declared as a 1 based array with
481 * potential empty slots at the beginning.
482 */
ea1075ed
JG
483 device->port_immutable =
484 kcalloc(rdma_end_port(device) + 1,
485 sizeof(*device->port_immutable), GFP_KERNEL);
7738613e 486 if (!device->port_immutable)
55aeed06 487 return -ENOMEM;
5eb620c8 488
ea1075ed 489 rdma_for_each_port (device, port) {
3023a1e9
KH
490 ret = device->ops.get_port_immutable(
491 device, port, &device->port_immutable[port]);
5eb620c8 492 if (ret)
55aeed06 493 return ret;
337877a4 494
55aeed06
JG
495 if (verify_immutable(device, port))
496 return -EINVAL;
5eb620c8 497 }
55aeed06 498 return 0;
5eb620c8
YE
499}
500
9abb0d1b 501void ib_get_device_fw_str(struct ib_device *dev, char *str)
5fa76c20 502{
3023a1e9
KH
503 if (dev->ops.get_dev_fw_str)
504 dev->ops.get_dev_fw_str(dev, str);
5fa76c20
IW
505 else
506 str[0] = '\0';
507}
508EXPORT_SYMBOL(ib_get_device_fw_str);
509
d291f1a6
DJ
510static int setup_port_pkey_list(struct ib_device *device)
511{
512 int i;
513
514 /**
515 * device->port_pkey_list is indexed directly by the port number,
516 * Therefore it is declared as a 1 based array with potential empty
517 * slots at the beginning.
518 */
519 device->port_pkey_list = kcalloc(rdma_end_port(device) + 1,
520 sizeof(*device->port_pkey_list),
521 GFP_KERNEL);
522
523 if (!device->port_pkey_list)
524 return -ENOMEM;
525
526 for (i = 0; i < (rdma_end_port(device) + 1); i++) {
527 spin_lock_init(&device->port_pkey_list[i].list_lock);
528 INIT_LIST_HEAD(&device->port_pkey_list[i].pkey_list);
529 }
530
531 return 0;
532}
533
8f408ab6
DJ
534static void ib_policy_change_task(struct work_struct *work)
535{
536 struct ib_device *dev;
0df91bb6 537 unsigned long index;
8f408ab6 538
921eab11 539 down_read(&devices_rwsem);
0df91bb6 540 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
ea1075ed 541 unsigned int i;
8f408ab6 542
ea1075ed 543 rdma_for_each_port (dev, i) {
8f408ab6
DJ
544 u64 sp;
545 int ret = ib_get_cached_subnet_prefix(dev,
546 i,
547 &sp);
548
549 WARN_ONCE(ret,
550 "ib_get_cached_subnet_prefix err: %d, this should never happen here\n",
551 ret);
a750cfde
DJ
552 if (!ret)
553 ib_security_cache_change(dev, i, sp);
8f408ab6
DJ
554 }
555 }
921eab11 556 up_read(&devices_rwsem);
8f408ab6
DJ
557}
558
559static int ib_security_change(struct notifier_block *nb, unsigned long event,
560 void *lsm_data)
561{
562 if (event != LSM_POLICY_CHANGE)
563 return NOTIFY_DONE;
564
565 schedule_work(&ib_policy_change_work);
c66f6741 566 ib_mad_agent_security_change();
8f408ab6
DJ
567
568 return NOTIFY_OK;
569}
570
0df91bb6
JG
571/*
572 * Assign the unique string device name and the unique device index.
ecc82c53 573 */
0df91bb6 574static int assign_name(struct ib_device *device, const char *name)
ecc82c53 575{
0df91bb6
JG
576 static u32 last_id;
577 int ret;
ecc82c53 578
921eab11 579 down_write(&devices_rwsem);
0df91bb6
JG
580 /* Assign a unique name to the device */
581 if (strchr(name, '%'))
582 ret = alloc_name(device, name);
583 else
584 ret = dev_set_name(&device->dev, name);
585 if (ret)
586 goto out;
587
588 if (__ib_device_get_by_name(dev_name(&device->dev))) {
589 ret = -ENFILE;
590 goto out;
591 }
592 strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
ecc82c53 593
0df91bb6
JG
594 /* Cyclically allocate a user visible ID for the device */
595 device->index = last_id;
596 ret = xa_alloc(&devices, &device->index, INT_MAX, device, GFP_KERNEL);
597 if (ret == -ENOSPC) {
598 device->index = 0;
599 ret = xa_alloc(&devices, &device->index, INT_MAX, device,
600 GFP_KERNEL);
ecc82c53 601 }
0df91bb6
JG
602 if (ret)
603 goto out;
604 last_id = device->index + 1;
605
606 ret = 0;
921eab11 607
0df91bb6 608out:
921eab11 609 up_write(&devices_rwsem);
0df91bb6
JG
610 return ret;
611}
612
613static void release_name(struct ib_device *device)
614{
921eab11 615 down_write(&devices_rwsem);
0df91bb6 616 xa_erase(&devices, device->index);
921eab11 617 up_write(&devices_rwsem);
ecc82c53
LR
618}
619
548cb4fb 620static void setup_dma_device(struct ib_device *device)
1da177e4 621{
99db9494
BVA
622 struct device *parent = device->dev.parent;
623
0957c29f
BVA
624 WARN_ON_ONCE(device->dma_device);
625 if (device->dev.dma_ops) {
626 /*
627 * The caller provided custom DMA operations. Copy the
628 * DMA-related fields that are used by e.g. dma_alloc_coherent()
629 * into device->dev.
630 */
631 device->dma_device = &device->dev;
02ee9da3
BVA
632 if (!device->dev.dma_mask) {
633 if (parent)
634 device->dev.dma_mask = parent->dma_mask;
635 else
636 WARN_ON_ONCE(true);
637 }
638 if (!device->dev.coherent_dma_mask) {
639 if (parent)
640 device->dev.coherent_dma_mask =
641 parent->coherent_dma_mask;
642 else
643 WARN_ON_ONCE(true);
644 }
0957c29f
BVA
645 } else {
646 /*
647 * The caller did not provide custom DMA operations. Use the
648 * DMA mapping operations of the parent device.
649 */
02ee9da3 650 WARN_ON_ONCE(!parent);
0957c29f
BVA
651 device->dma_device = parent;
652 }
548cb4fb 653}
1da177e4 654
921eab11
JG
655/*
656 * setup_device() allocates memory and sets up data that requires calling the
657 * device ops, this is the only reason these actions are not done during
658 * ib_alloc_device. It is undone by ib_dealloc_device().
659 */
548cb4fb
PP
660static int setup_device(struct ib_device *device)
661{
662 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
663 int ret;
1da177e4 664
921eab11
JG
665 setup_dma_device(device);
666
548cb4fb
PP
667 ret = ib_device_check_mandatory(device);
668 if (ret)
669 return ret;
1da177e4 670
7738613e 671 ret = read_port_immutable(device);
5eb620c8 672 if (ret) {
43c7c851
JG
673 dev_warn(&device->dev,
674 "Couldn't create per port immutable data\n");
548cb4fb
PP
675 return ret;
676 }
677
678 memset(&device->attrs, 0, sizeof(device->attrs));
3023a1e9 679 ret = device->ops.query_device(device, &device->attrs, &uhw);
548cb4fb
PP
680 if (ret) {
681 dev_warn(&device->dev,
682 "Couldn't query the device attributes\n");
d45f89d5 683 return ret;
5eb620c8
YE
684 }
685
d291f1a6
DJ
686 ret = setup_port_pkey_list(device);
687 if (ret) {
43c7c851 688 dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
b34b269a 689 return ret;
03db3a2d 690 }
548cb4fb 691
d45f89d5 692 return 0;
548cb4fb
PP
693}
694
921eab11
JG
695static void disable_device(struct ib_device *device)
696{
697 struct ib_client *client;
698
699 WARN_ON(!refcount_read(&device->refcount));
700
701 down_write(&devices_rwsem);
702 xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
703 up_write(&devices_rwsem);
704
705 down_read(&clients_rwsem);
706 list_for_each_entry_reverse(client, &client_list, list)
707 remove_client_context(device, client->client_id);
708 up_read(&clients_rwsem);
709
710 /* Pairs with refcount_set in enable_device */
711 ib_device_put(device);
712 wait_for_completion(&device->unreg_completion);
713}
714
715/*
716 * An enabled device is visible to all clients and to all the public facing
717 * APIs that return a device pointer.
718 */
719static int enable_device(struct ib_device *device)
720{
721 struct ib_client *client;
722 unsigned long index;
723 int ret;
724
725 refcount_set(&device->refcount, 1);
726 down_write(&devices_rwsem);
727 xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
728 up_write(&devices_rwsem);
729
730 down_read(&clients_rwsem);
731 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
732 ret = add_client_context(device, client);
733 if (ret) {
734 up_read(&clients_rwsem);
735 disable_device(device);
736 return ret;
737 }
738 }
739 up_read(&clients_rwsem);
740 return 0;
741}
742
548cb4fb
PP
743/**
744 * ib_register_device - Register an IB device with IB core
745 * @device:Device to register
746 *
747 * Low-level drivers use ib_register_device() to register their
748 * devices with the IB core. All registered clients will receive a
749 * callback for each device that is added. @device must be allocated
750 * with ib_alloc_device().
751 */
ea4baf7f 752int ib_register_device(struct ib_device *device, const char *name)
548cb4fb
PP
753{
754 int ret;
548cb4fb 755
0df91bb6
JG
756 ret = assign_name(device, name);
757 if (ret)
921eab11 758 return ret;
548cb4fb
PP
759
760 ret = setup_device(device);
761 if (ret)
921eab11 762 goto out;
03db3a2d 763
d45f89d5
JG
764 ret = ib_cache_setup_one(device);
765 if (ret) {
766 dev_warn(&device->dev,
767 "Couldn't set up InfiniBand P_Key/GID cache\n");
921eab11 768 goto out;
d45f89d5
JG
769 }
770
7527a7b1 771 ib_device_register_rdmacg(device);
3e153a93 772
5f8f5499
PP
773 ret = device_add(&device->dev);
774 if (ret)
775 goto cg_cleanup;
776
ea4baf7f 777 ret = ib_device_register_sysfs(device);
1da177e4 778 if (ret) {
43c7c851
JG
779 dev_warn(&device->dev,
780 "Couldn't register device with driver model\n");
5f8f5499 781 goto dev_cleanup;
1da177e4
LT
782 }
783
921eab11
JG
784 ret = enable_device(device);
785 if (ret)
786 goto sysfs_cleanup;
1da177e4 787
4be3a4fa
PP
788 return 0;
789
921eab11
JG
790sysfs_cleanup:
791 ib_device_unregister_sysfs(device);
5f8f5499
PP
792dev_cleanup:
793 device_del(&device->dev);
2fb4f4ea
PP
794cg_cleanup:
795 ib_device_unregister_rdmacg(device);
d45f89d5 796 ib_cache_cleanup_one(device);
5aa44bb9 797out:
921eab11 798 release_name(device);
1da177e4
LT
799 return ret;
800}
801EXPORT_SYMBOL(ib_register_device);
802
803/**
804 * ib_unregister_device - Unregister an IB device
805 * @device:Device to unregister
806 *
807 * Unregister an IB device. All clients will receive a remove callback.
808 */
809void ib_unregister_device(struct ib_device *device)
810{
921eab11 811 disable_device(device);
9206dff1 812 ib_device_unregister_sysfs(device);
5f8f5499 813 device_del(&device->dev);
c715a395 814 ib_device_unregister_rdmacg(device);
03db3a2d 815 ib_cache_cleanup_one(device);
921eab11 816 release_name(device);
1da177e4
LT
817}
818EXPORT_SYMBOL(ib_unregister_device);
819
e59178d8
JG
820static int assign_client_id(struct ib_client *client)
821{
822 int ret;
823
921eab11 824 down_write(&clients_rwsem);
e59178d8
JG
825 /*
826 * The add/remove callbacks must be called in FIFO/LIFO order. To
827 * achieve this we assign client_ids so they are sorted in
828 * registration order, and retain a linked list we can reverse iterate
829 * to get the LIFO order. The extra linked list can go away if xarray
830 * learns to reverse iterate.
831 */
832 if (list_empty(&client_list))
833 client->client_id = 0;
834 else
835 client->client_id =
836 list_last_entry(&client_list, struct ib_client, list)
837 ->client_id;
838 ret = xa_alloc(&clients, &client->client_id, INT_MAX, client,
839 GFP_KERNEL);
840 if (ret)
841 goto out;
842
921eab11
JG
843 xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
844 list_add_tail(&client->list, &client_list);
845
e59178d8 846out:
921eab11 847 up_write(&clients_rwsem);
e59178d8
JG
848 return ret;
849}
850
1da177e4
LT
851/**
852 * ib_register_client - Register an IB client
853 * @client:Client to register
854 *
855 * Upper level users of the IB drivers can use ib_register_client() to
856 * register callbacks for IB device addition and removal. When an IB
857 * device is added, each registered client's add method will be called
858 * (in the order the clients were registered), and when a device is
859 * removed, each client's remove method will be called (in the reverse
860 * order that clients were registered). In addition, when
861 * ib_register_client() is called, the client will receive an add
862 * callback for all devices already registered.
863 */
864int ib_register_client(struct ib_client *client)
865{
866 struct ib_device *device;
0df91bb6 867 unsigned long index;
e59178d8 868 int ret;
1da177e4 869
e59178d8 870 ret = assign_client_id(client);
921eab11 871 if (ret)
e59178d8 872 return ret;
1da177e4 873
921eab11
JG
874 down_read(&devices_rwsem);
875 xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
876 ret = add_client_context(device, client);
877 if (ret) {
878 up_read(&devices_rwsem);
879 ib_unregister_client(client);
880 return ret;
881 }
882 }
883 up_read(&devices_rwsem);
1da177e4
LT
884 return 0;
885}
886EXPORT_SYMBOL(ib_register_client);
887
888/**
889 * ib_unregister_client - Unregister an IB client
890 * @client:Client to unregister
891 *
892 * Upper level users use ib_unregister_client() to remove their client
893 * registration. When ib_unregister_client() is called, the client
894 * will receive a remove callback for each IB device still registered.
921eab11
JG
895 *
896 * This is a full fence, once it returns no client callbacks will be called,
897 * or are running in another thread.
1da177e4
LT
898 */
899void ib_unregister_client(struct ib_client *client)
900{
1da177e4 901 struct ib_device *device;
0df91bb6 902 unsigned long index;
1da177e4 903
921eab11 904 down_write(&clients_rwsem);
e59178d8 905 xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
921eab11
JG
906 up_write(&clients_rwsem);
907 /*
908 * Every device still known must be serialized to make sure we are
909 * done with the client callbacks before we return.
910 */
911 down_read(&devices_rwsem);
912 xa_for_each (&devices, index, device)
913 remove_client_context(device, client->client_id);
914 up_read(&devices_rwsem);
1da177e4 915
921eab11 916 down_write(&clients_rwsem);
e59178d8
JG
917 list_del(&client->list);
918 xa_erase(&clients, client->client_id);
921eab11 919 up_write(&clients_rwsem);
1da177e4
LT
920}
921EXPORT_SYMBOL(ib_unregister_client);
922
1da177e4 923/**
9cd330d3 924 * ib_set_client_data - Set IB client context
1da177e4
LT
925 * @device:Device to set context for
926 * @client:Client to set context for
927 * @data:Context to set
928 *
0df91bb6
JG
929 * ib_set_client_data() sets client context data that can be retrieved with
930 * ib_get_client_data(). This can only be called while the client is
931 * registered to the device, once the ib_client remove() callback returns this
932 * cannot be called.
1da177e4
LT
933 */
934void ib_set_client_data(struct ib_device *device, struct ib_client *client,
935 void *data)
936{
0df91bb6 937 void *rc;
1da177e4 938
0df91bb6
JG
939 if (WARN_ON(IS_ERR(data)))
940 data = NULL;
1da177e4 941
0df91bb6
JG
942 rc = xa_store(&device->client_data, client->client_id, data,
943 GFP_KERNEL);
944 WARN_ON(xa_is_err(rc));
1da177e4
LT
945}
946EXPORT_SYMBOL(ib_set_client_data);
947
948/**
949 * ib_register_event_handler - Register an IB event handler
950 * @event_handler:Handler to register
951 *
952 * ib_register_event_handler() registers an event handler that will be
953 * called back when asynchronous IB events occur (as defined in
954 * chapter 11 of the InfiniBand Architecture Specification). This
955 * callback may occur in interrupt context.
956 */
dcc9881e 957void ib_register_event_handler(struct ib_event_handler *event_handler)
1da177e4
LT
958{
959 unsigned long flags;
960
961 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
962 list_add_tail(&event_handler->list,
963 &event_handler->device->event_handler_list);
964 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
1da177e4
LT
965}
966EXPORT_SYMBOL(ib_register_event_handler);
967
968/**
969 * ib_unregister_event_handler - Unregister an event handler
970 * @event_handler:Handler to unregister
971 *
972 * Unregister an event handler registered with
973 * ib_register_event_handler().
974 */
dcc9881e 975void ib_unregister_event_handler(struct ib_event_handler *event_handler)
1da177e4
LT
976{
977 unsigned long flags;
978
979 spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
980 list_del(&event_handler->list);
981 spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
1da177e4
LT
982}
983EXPORT_SYMBOL(ib_unregister_event_handler);
984
985/**
986 * ib_dispatch_event - Dispatch an asynchronous event
987 * @event:Event to dispatch
988 *
989 * Low-level drivers must call ib_dispatch_event() to dispatch the
990 * event to all registered event handlers when an asynchronous event
991 * occurs.
992 */
993void ib_dispatch_event(struct ib_event *event)
994{
995 unsigned long flags;
996 struct ib_event_handler *handler;
997
998 spin_lock_irqsave(&event->device->event_handler_lock, flags);
999
1000 list_for_each_entry(handler, &event->device->event_handler_list, list)
1001 handler->handler(handler, event);
1002
1003 spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
1004}
1005EXPORT_SYMBOL(ib_dispatch_event);
1006
1da177e4
LT
1007/**
1008 * ib_query_port - Query IB port attributes
1009 * @device:Device to query
1010 * @port_num:Port number to query
1011 * @port_attr:Port attributes
1012 *
1013 * ib_query_port() returns the attributes of a port through the
1014 * @port_attr pointer.
1015 */
1016int ib_query_port(struct ib_device *device,
1017 u8 port_num,
1018 struct ib_port_attr *port_attr)
1019{
fad61ad4
EC
1020 union ib_gid gid;
1021 int err;
1022
24dc831b 1023 if (!rdma_is_port_valid(device, port_num))
116c0074
RD
1024 return -EINVAL;
1025
fad61ad4 1026 memset(port_attr, 0, sizeof(*port_attr));
3023a1e9 1027 err = device->ops.query_port(device, port_num, port_attr);
fad61ad4
EC
1028 if (err || port_attr->subnet_prefix)
1029 return err;
1030
d7012467
EC
1031 if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
1032 return 0;
1033
3023a1e9 1034 err = device->ops.query_gid(device, port_num, 0, &gid);
fad61ad4
EC
1035 if (err)
1036 return err;
1037
1038 port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
1039 return 0;
1da177e4
LT
1040}
1041EXPORT_SYMBOL(ib_query_port);
1042
03db3a2d
MB
1043/**
1044 * ib_enum_roce_netdev - enumerate all RoCE ports
1045 * @ib_dev : IB device we want to query
1046 * @filter: Should we call the callback?
1047 * @filter_cookie: Cookie passed to filter
1048 * @cb: Callback to call for each found RoCE ports
1049 * @cookie: Cookie passed back to the callback
1050 *
1051 * Enumerates all of the physical RoCE ports of ib_dev
1052 * which are related to netdevice and calls callback() on each
1053 * device for which filter() function returns non zero.
1054 */
1055void ib_enum_roce_netdev(struct ib_device *ib_dev,
1056 roce_netdev_filter filter,
1057 void *filter_cookie,
1058 roce_netdev_callback cb,
1059 void *cookie)
1060{
ea1075ed 1061 unsigned int port;
03db3a2d 1062
ea1075ed 1063 rdma_for_each_port (ib_dev, port)
03db3a2d
MB
1064 if (rdma_protocol_roce(ib_dev, port)) {
1065 struct net_device *idev = NULL;
1066
3023a1e9
KH
1067 if (ib_dev->ops.get_netdev)
1068 idev = ib_dev->ops.get_netdev(ib_dev, port);
03db3a2d
MB
1069
1070 if (idev &&
1071 idev->reg_state >= NETREG_UNREGISTERED) {
1072 dev_put(idev);
1073 idev = NULL;
1074 }
1075
1076 if (filter(ib_dev, port, idev, filter_cookie))
1077 cb(ib_dev, port, idev, cookie);
1078
1079 if (idev)
1080 dev_put(idev);
1081 }
1082}
1083
1084/**
1085 * ib_enum_all_roce_netdevs - enumerate all RoCE devices
1086 * @filter: Should we call the callback?
1087 * @filter_cookie: Cookie passed to filter
1088 * @cb: Callback to call for each found RoCE ports
1089 * @cookie: Cookie passed back to the callback
1090 *
1091 * Enumerates all RoCE devices' physical ports which are related
1092 * to netdevices and calls callback() on each device for which
1093 * filter() function returns non zero.
1094 */
1095void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
1096 void *filter_cookie,
1097 roce_netdev_callback cb,
1098 void *cookie)
1099{
1100 struct ib_device *dev;
0df91bb6 1101 unsigned long index;
03db3a2d 1102
921eab11 1103 down_read(&devices_rwsem);
0df91bb6 1104 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
03db3a2d 1105 ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
921eab11 1106 up_read(&devices_rwsem);
8030c835
LR
1107}
1108
1109/**
1110 * ib_enum_all_devs - enumerate all ib_devices
1111 * @cb: Callback to call for each found ib_device
1112 *
1113 * Enumerates all ib_devices and calls callback() on each device.
1114 */
1115int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
1116 struct netlink_callback *cb)
1117{
0df91bb6 1118 unsigned long index;
8030c835
LR
1119 struct ib_device *dev;
1120 unsigned int idx = 0;
1121 int ret = 0;
1122
921eab11 1123 down_read(&devices_rwsem);
0df91bb6 1124 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
8030c835
LR
1125 ret = nldev_cb(dev, skb, cb, idx);
1126 if (ret)
1127 break;
1128 idx++;
1129 }
921eab11 1130 up_read(&devices_rwsem);
8030c835 1131 return ret;
03db3a2d
MB
1132}
1133
1da177e4
LT
1134/**
1135 * ib_query_pkey - Get P_Key table entry
1136 * @device:Device to query
1137 * @port_num:Port number to query
1138 * @index:P_Key table index to query
1139 * @pkey:Returned P_Key
1140 *
1141 * ib_query_pkey() fetches the specified P_Key table entry.
1142 */
1143int ib_query_pkey(struct ib_device *device,
1144 u8 port_num, u16 index, u16 *pkey)
1145{
9af3f5cf
YS
1146 if (!rdma_is_port_valid(device, port_num))
1147 return -EINVAL;
1148
3023a1e9 1149 return device->ops.query_pkey(device, port_num, index, pkey);
1da177e4
LT
1150}
1151EXPORT_SYMBOL(ib_query_pkey);
1152
1153/**
1154 * ib_modify_device - Change IB device attributes
1155 * @device:Device to modify
1156 * @device_modify_mask:Mask of attributes to change
1157 * @device_modify:New attribute values
1158 *
1159 * ib_modify_device() changes a device's attributes as specified by
1160 * the @device_modify_mask and @device_modify structure.
1161 */
1162int ib_modify_device(struct ib_device *device,
1163 int device_modify_mask,
1164 struct ib_device_modify *device_modify)
1165{
3023a1e9 1166 if (!device->ops.modify_device)
10e1b54b
BVA
1167 return -ENOSYS;
1168
3023a1e9
KH
1169 return device->ops.modify_device(device, device_modify_mask,
1170 device_modify);
1da177e4
LT
1171}
1172EXPORT_SYMBOL(ib_modify_device);
1173
1174/**
1175 * ib_modify_port - Modifies the attributes for the specified port.
1176 * @device: The device to modify.
1177 * @port_num: The number of the port to modify.
1178 * @port_modify_mask: Mask used to specify which attributes of the port
1179 * to change.
1180 * @port_modify: New attribute values for the port.
1181 *
1182 * ib_modify_port() changes a port's attributes as specified by the
1183 * @port_modify_mask and @port_modify structure.
1184 */
1185int ib_modify_port(struct ib_device *device,
1186 u8 port_num, int port_modify_mask,
1187 struct ib_port_modify *port_modify)
1188{
61e0962d 1189 int rc;
10e1b54b 1190
24dc831b 1191 if (!rdma_is_port_valid(device, port_num))
116c0074
RD
1192 return -EINVAL;
1193
3023a1e9
KH
1194 if (device->ops.modify_port)
1195 rc = device->ops.modify_port(device, port_num,
1196 port_modify_mask,
1197 port_modify);
61e0962d
SX
1198 else
1199 rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
1200 return rc;
1da177e4
LT
1201}
1202EXPORT_SYMBOL(ib_modify_port);
1203
5eb620c8
YE
1204/**
1205 * ib_find_gid - Returns the port number and GID table index where
dbb12562 1206 * a specified GID value occurs. Its searches only for IB link layer.
5eb620c8
YE
1207 * @device: The device to query.
1208 * @gid: The GID value to search for.
1209 * @port_num: The port number of the device where the GID value was found.
1210 * @index: The index into the GID table where the GID was found. This
1211 * parameter may be NULL.
1212 */
1213int ib_find_gid(struct ib_device *device, union ib_gid *gid,
b26c4a11 1214 u8 *port_num, u16 *index)
5eb620c8
YE
1215{
1216 union ib_gid tmp_gid;
ea1075ed
JG
1217 unsigned int port;
1218 int ret, i;
5eb620c8 1219
ea1075ed 1220 rdma_for_each_port (device, port) {
22d24f75 1221 if (!rdma_protocol_ib(device, port))
b39ffa1d
MB
1222 continue;
1223
7738613e 1224 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
1dfce294 1225 ret = rdma_query_gid(device, port, i, &tmp_gid);
5eb620c8
YE
1226 if (ret)
1227 return ret;
1228 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
1229 *port_num = port;
1230 if (index)
1231 *index = i;
1232 return 0;
1233 }
1234 }
1235 }
1236
1237 return -ENOENT;
1238}
1239EXPORT_SYMBOL(ib_find_gid);
1240
1241/**
1242 * ib_find_pkey - Returns the PKey table index where a specified
1243 * PKey value occurs.
1244 * @device: The device to query.
1245 * @port_num: The port number of the device to search for the PKey.
1246 * @pkey: The PKey value to search for.
1247 * @index: The index into the PKey table where the PKey was found.
1248 */
1249int ib_find_pkey(struct ib_device *device,
1250 u8 port_num, u16 pkey, u16 *index)
1251{
1252 int ret, i;
1253 u16 tmp_pkey;
ff7166c4 1254 int partial_ix = -1;
5eb620c8 1255
7738613e 1256 for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) {
5eb620c8
YE
1257 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
1258 if (ret)
1259 return ret;
36026ecc 1260 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
ff7166c4
JM
1261 /* if there is full-member pkey take it.*/
1262 if (tmp_pkey & 0x8000) {
1263 *index = i;
1264 return 0;
1265 }
1266 if (partial_ix < 0)
1267 partial_ix = i;
5eb620c8
YE
1268 }
1269 }
1270
ff7166c4
JM
1271 /*no full-member, if exists take the limited*/
1272 if (partial_ix >= 0) {
1273 *index = partial_ix;
1274 return 0;
1275 }
5eb620c8
YE
1276 return -ENOENT;
1277}
1278EXPORT_SYMBOL(ib_find_pkey);
1279
9268f72d
YK
1280/**
1281 * ib_get_net_dev_by_params() - Return the appropriate net_dev
1282 * for a received CM request
1283 * @dev: An RDMA device on which the request has been received.
1284 * @port: Port number on the RDMA device.
1285 * @pkey: The Pkey the request came on.
1286 * @gid: A GID that the net_dev uses to communicate.
1287 * @addr: Contains the IP address that the request specified as its
1288 * destination.
921eab11 1289 *
9268f72d
YK
1290 */
1291struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
1292 u8 port,
1293 u16 pkey,
1294 const union ib_gid *gid,
1295 const struct sockaddr *addr)
1296{
1297 struct net_device *net_dev = NULL;
0df91bb6
JG
1298 unsigned long index;
1299 void *client_data;
9268f72d
YK
1300
1301 if (!rdma_protocol_ib(dev, port))
1302 return NULL;
1303
921eab11
JG
1304 /*
1305 * Holding the read side guarantees that the client will not become
1306 * unregistered while we are calling get_net_dev_by_params()
1307 */
1308 down_read(&dev->client_data_rwsem);
0df91bb6
JG
1309 xan_for_each_marked (&dev->client_data, index, client_data,
1310 CLIENT_DATA_REGISTERED) {
1311 struct ib_client *client = xa_load(&clients, index);
9268f72d 1312
0df91bb6 1313 if (!client || !client->get_net_dev_by_params)
9268f72d
YK
1314 continue;
1315
0df91bb6
JG
1316 net_dev = client->get_net_dev_by_params(dev, port, pkey, gid,
1317 addr, client_data);
1318 if (net_dev)
1319 break;
9268f72d 1320 }
921eab11 1321 up_read(&dev->client_data_rwsem);
9268f72d
YK
1322
1323 return net_dev;
1324}
1325EXPORT_SYMBOL(ib_get_net_dev_by_params);
1326
521ed0d9
KH
1327void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
1328{
3023a1e9 1329 struct ib_device_ops *dev_ops = &dev->ops;
521ed0d9
KH
1330#define SET_DEVICE_OP(ptr, name) \
1331 do { \
1332 if (ops->name) \
1333 if (!((ptr)->name)) \
1334 (ptr)->name = ops->name; \
1335 } while (0)
1336
30471d4b
LR
1337#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
1338
3023a1e9 1339 SET_DEVICE_OP(dev_ops, add_gid);
2f1927b0 1340 SET_DEVICE_OP(dev_ops, advise_mr);
3023a1e9
KH
1341 SET_DEVICE_OP(dev_ops, alloc_dm);
1342 SET_DEVICE_OP(dev_ops, alloc_fmr);
1343 SET_DEVICE_OP(dev_ops, alloc_hw_stats);
1344 SET_DEVICE_OP(dev_ops, alloc_mr);
1345 SET_DEVICE_OP(dev_ops, alloc_mw);
1346 SET_DEVICE_OP(dev_ops, alloc_pd);
1347 SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
1348 SET_DEVICE_OP(dev_ops, alloc_ucontext);
1349 SET_DEVICE_OP(dev_ops, alloc_xrcd);
1350 SET_DEVICE_OP(dev_ops, attach_mcast);
1351 SET_DEVICE_OP(dev_ops, check_mr_status);
1352 SET_DEVICE_OP(dev_ops, create_ah);
1353 SET_DEVICE_OP(dev_ops, create_counters);
1354 SET_DEVICE_OP(dev_ops, create_cq);
1355 SET_DEVICE_OP(dev_ops, create_flow);
1356 SET_DEVICE_OP(dev_ops, create_flow_action_esp);
1357 SET_DEVICE_OP(dev_ops, create_qp);
1358 SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
1359 SET_DEVICE_OP(dev_ops, create_srq);
1360 SET_DEVICE_OP(dev_ops, create_wq);
1361 SET_DEVICE_OP(dev_ops, dealloc_dm);
1362 SET_DEVICE_OP(dev_ops, dealloc_fmr);
1363 SET_DEVICE_OP(dev_ops, dealloc_mw);
1364 SET_DEVICE_OP(dev_ops, dealloc_pd);
1365 SET_DEVICE_OP(dev_ops, dealloc_ucontext);
1366 SET_DEVICE_OP(dev_ops, dealloc_xrcd);
1367 SET_DEVICE_OP(dev_ops, del_gid);
1368 SET_DEVICE_OP(dev_ops, dereg_mr);
1369 SET_DEVICE_OP(dev_ops, destroy_ah);
1370 SET_DEVICE_OP(dev_ops, destroy_counters);
1371 SET_DEVICE_OP(dev_ops, destroy_cq);
1372 SET_DEVICE_OP(dev_ops, destroy_flow);
1373 SET_DEVICE_OP(dev_ops, destroy_flow_action);
1374 SET_DEVICE_OP(dev_ops, destroy_qp);
1375 SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
1376 SET_DEVICE_OP(dev_ops, destroy_srq);
1377 SET_DEVICE_OP(dev_ops, destroy_wq);
1378 SET_DEVICE_OP(dev_ops, detach_mcast);
1379 SET_DEVICE_OP(dev_ops, disassociate_ucontext);
1380 SET_DEVICE_OP(dev_ops, drain_rq);
1381 SET_DEVICE_OP(dev_ops, drain_sq);
02da3750 1382 SET_DEVICE_OP(dev_ops, fill_res_entry);
3023a1e9
KH
1383 SET_DEVICE_OP(dev_ops, get_dev_fw_str);
1384 SET_DEVICE_OP(dev_ops, get_dma_mr);
1385 SET_DEVICE_OP(dev_ops, get_hw_stats);
1386 SET_DEVICE_OP(dev_ops, get_link_layer);
1387 SET_DEVICE_OP(dev_ops, get_netdev);
1388 SET_DEVICE_OP(dev_ops, get_port_immutable);
1389 SET_DEVICE_OP(dev_ops, get_vector_affinity);
1390 SET_DEVICE_OP(dev_ops, get_vf_config);
1391 SET_DEVICE_OP(dev_ops, get_vf_stats);
ea4baf7f 1392 SET_DEVICE_OP(dev_ops, init_port);
3023a1e9
KH
1393 SET_DEVICE_OP(dev_ops, map_mr_sg);
1394 SET_DEVICE_OP(dev_ops, map_phys_fmr);
1395 SET_DEVICE_OP(dev_ops, mmap);
1396 SET_DEVICE_OP(dev_ops, modify_ah);
1397 SET_DEVICE_OP(dev_ops, modify_cq);
1398 SET_DEVICE_OP(dev_ops, modify_device);
1399 SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
1400 SET_DEVICE_OP(dev_ops, modify_port);
1401 SET_DEVICE_OP(dev_ops, modify_qp);
1402 SET_DEVICE_OP(dev_ops, modify_srq);
1403 SET_DEVICE_OP(dev_ops, modify_wq);
1404 SET_DEVICE_OP(dev_ops, peek_cq);
1405 SET_DEVICE_OP(dev_ops, poll_cq);
1406 SET_DEVICE_OP(dev_ops, post_recv);
1407 SET_DEVICE_OP(dev_ops, post_send);
1408 SET_DEVICE_OP(dev_ops, post_srq_recv);
1409 SET_DEVICE_OP(dev_ops, process_mad);
1410 SET_DEVICE_OP(dev_ops, query_ah);
1411 SET_DEVICE_OP(dev_ops, query_device);
1412 SET_DEVICE_OP(dev_ops, query_gid);
1413 SET_DEVICE_OP(dev_ops, query_pkey);
1414 SET_DEVICE_OP(dev_ops, query_port);
1415 SET_DEVICE_OP(dev_ops, query_qp);
1416 SET_DEVICE_OP(dev_ops, query_srq);
1417 SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
1418 SET_DEVICE_OP(dev_ops, read_counters);
1419 SET_DEVICE_OP(dev_ops, reg_dm_mr);
1420 SET_DEVICE_OP(dev_ops, reg_user_mr);
1421 SET_DEVICE_OP(dev_ops, req_ncomp_notif);
1422 SET_DEVICE_OP(dev_ops, req_notify_cq);
1423 SET_DEVICE_OP(dev_ops, rereg_user_mr);
1424 SET_DEVICE_OP(dev_ops, resize_cq);
1425 SET_DEVICE_OP(dev_ops, set_vf_guid);
1426 SET_DEVICE_OP(dev_ops, set_vf_link_state);
1427 SET_DEVICE_OP(dev_ops, unmap_fmr);
21a428a0
LR
1428
1429 SET_OBJ_SIZE(dev_ops, ib_pd);
521ed0d9
KH
1430}
1431EXPORT_SYMBOL(ib_set_device_ops);
1432
d0e312fe 1433static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
735c631a 1434 [RDMA_NL_LS_OP_RESOLVE] = {
647c75ac 1435 .doit = ib_nl_handle_resolve_resp,
e3a2b93d
LR
1436 .flags = RDMA_NL_ADMIN_PERM,
1437 },
735c631a 1438 [RDMA_NL_LS_OP_SET_TIMEOUT] = {
647c75ac 1439 .doit = ib_nl_handle_set_timeout,
e3a2b93d
LR
1440 .flags = RDMA_NL_ADMIN_PERM,
1441 },
ae43f828 1442 [RDMA_NL_LS_OP_IP_RESOLVE] = {
647c75ac 1443 .doit = ib_nl_handle_ip_res_resp,
e3a2b93d
LR
1444 .flags = RDMA_NL_ADMIN_PERM,
1445 },
735c631a
MB
1446};
1447
1da177e4
LT
1448static int __init ib_core_init(void)
1449{
1450 int ret;
1451
f0626710
TH
1452 ib_wq = alloc_workqueue("infiniband", 0, 0);
1453 if (!ib_wq)
1454 return -ENOMEM;
1455
14d3a3b2 1456 ib_comp_wq = alloc_workqueue("ib-comp-wq",
b7363e67 1457 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
14d3a3b2
CH
1458 if (!ib_comp_wq) {
1459 ret = -ENOMEM;
1460 goto err;
1461 }
1462
f794809a
JM
1463 ib_comp_unbound_wq =
1464 alloc_workqueue("ib-comp-unb-wq",
1465 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
1466 WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
1467 if (!ib_comp_unbound_wq) {
1468 ret = -ENOMEM;
1469 goto err_comp;
1470 }
1471
55aeed06 1472 ret = class_register(&ib_class);
fd75c789 1473 if (ret) {
aba25a3e 1474 pr_warn("Couldn't create InfiniBand device class\n");
f794809a 1475 goto err_comp_unbound;
fd75c789 1476 }
1da177e4 1477
c9901724 1478 ret = rdma_nl_init();
b2cbae2c 1479 if (ret) {
c9901724 1480 pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
b2cbae2c
RD
1481 goto err_sysfs;
1482 }
1483
e3f20f02
LR
1484 ret = addr_init();
1485 if (ret) {
1486 pr_warn("Could't init IB address resolution\n");
1487 goto err_ibnl;
1488 }
1489
4c2cb422
MB
1490 ret = ib_mad_init();
1491 if (ret) {
1492 pr_warn("Couldn't init IB MAD\n");
1493 goto err_addr;
1494 }
1495
c2e49c92
MB
1496 ret = ib_sa_init();
1497 if (ret) {
1498 pr_warn("Couldn't init SA\n");
1499 goto err_mad;
1500 }
1501
8f408ab6
DJ
1502 ret = register_lsm_notifier(&ibdev_lsm_nb);
1503 if (ret) {
1504 pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
c9901724 1505 goto err_sa;
8f408ab6
DJ
1506 }
1507
6c80b41a 1508 nldev_init();
c9901724 1509 rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
5ef8c0c1 1510 roce_gid_mgmt_init();
1da177e4 1511
fd75c789
NM
1512 return 0;
1513
735c631a
MB
1514err_sa:
1515 ib_sa_cleanup();
c2e49c92
MB
1516err_mad:
1517 ib_mad_cleanup();
4c2cb422
MB
1518err_addr:
1519 addr_cleanup();
e3f20f02 1520err_ibnl:
c9901724 1521 rdma_nl_exit();
fd75c789 1522err_sysfs:
55aeed06 1523 class_unregister(&ib_class);
f794809a
JM
1524err_comp_unbound:
1525 destroy_workqueue(ib_comp_unbound_wq);
14d3a3b2
CH
1526err_comp:
1527 destroy_workqueue(ib_comp_wq);
fd75c789
NM
1528err:
1529 destroy_workqueue(ib_wq);
1da177e4
LT
1530 return ret;
1531}
1532
1533static void __exit ib_core_cleanup(void)
1534{
5ef8c0c1 1535 roce_gid_mgmt_cleanup();
6c80b41a 1536 nldev_exit();
c9901724
LR
1537 rdma_nl_unregister(RDMA_NL_LS);
1538 unregister_lsm_notifier(&ibdev_lsm_nb);
c2e49c92 1539 ib_sa_cleanup();
4c2cb422 1540 ib_mad_cleanup();
e3f20f02 1541 addr_cleanup();
c9901724 1542 rdma_nl_exit();
55aeed06 1543 class_unregister(&ib_class);
f794809a 1544 destroy_workqueue(ib_comp_unbound_wq);
14d3a3b2 1545 destroy_workqueue(ib_comp_wq);
f7c6a7b5 1546 /* Make sure that any pending umem accounting work is done. */
f0626710 1547 destroy_workqueue(ib_wq);
e59178d8 1548 WARN_ON(!xa_empty(&clients));
0df91bb6 1549 WARN_ON(!xa_empty(&devices));
1da177e4
LT
1550}
1551
e3bf14bd
JG
1552MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
1553
a9cd1a67 1554subsys_initcall(ib_core_init);
1da177e4 1555module_exit(ib_core_cleanup);