IB/uverbs: Introduce RWQ Indirection table
[linux-2.6-block.git] / drivers / infiniband / core / uverbs_main.c
CommitLineData
bc38a6ab
RD
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
33b9b3ee 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
2a1d9b7f
RD
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
67cdb40c 6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
bc38a6ab
RD
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
bc38a6ab
RD
35 */
36
37#include <linux/module.h>
38#include <linux/init.h>
39#include <linux/device.h>
40#include <linux/err.h>
41#include <linux/fs.h>
42#include <linux/poll.h>
a99bbaf5 43#include <linux/sched.h>
bc38a6ab 44#include <linux/file.h>
70a30e16 45#include <linux/cdev.h>
a265e558 46#include <linux/anon_inodes.h>
5a0e3ad6 47#include <linux/slab.h>
bc38a6ab
RD
48
49#include <asm/uaccess.h>
50
e6bd18f5
JG
51#include <rdma/ib.h>
52
bc38a6ab
RD
53#include "uverbs.h"
54
55MODULE_AUTHOR("Roland Dreier");
56MODULE_DESCRIPTION("InfiniBand userspace verbs access");
57MODULE_LICENSE("Dual BSD/GPL");
58
bc38a6ab
RD
59enum {
60 IB_UVERBS_MAJOR = 231,
61 IB_UVERBS_BASE_MINOR = 192,
62 IB_UVERBS_MAX_DEVICES = 32
63};
64
65#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
66
70a30e16
RD
67static struct class *uverbs_class;
68
9ead190b 69DEFINE_SPINLOCK(ib_uverbs_idr_lock);
bc38a6ab
RD
70DEFINE_IDR(ib_uverbs_pd_idr);
71DEFINE_IDR(ib_uverbs_mr_idr);
72DEFINE_IDR(ib_uverbs_mw_idr);
73DEFINE_IDR(ib_uverbs_ah_idr);
74DEFINE_IDR(ib_uverbs_cq_idr);
75DEFINE_IDR(ib_uverbs_qp_idr);
f520ba5a 76DEFINE_IDR(ib_uverbs_srq_idr);
53d0bd1e 77DEFINE_IDR(ib_uverbs_xrcd_idr);
436f2ad0 78DEFINE_IDR(ib_uverbs_rule_idr);
f213c052 79DEFINE_IDR(ib_uverbs_wq_idr);
de019a94 80DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
bc38a6ab 81
6276e08a 82static DEFINE_SPINLOCK(map_lock);
bc38a6ab
RD
83static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
84
85static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
057aec0d 86 struct ib_device *ib_dev,
bc38a6ab
RD
87 const char __user *buf, int in_len,
88 int out_len) = {
9afed76d
AC
89 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
90 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
91 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
92 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
93 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
94 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
7e6edb9b 95 [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
9afed76d 96 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
6b52a12b
SM
97 [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
98 [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
6b73597e 99 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
9afed76d
AC
100 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
101 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
102 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
103 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
104 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
105 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
106 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
107 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
108 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
109 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
110 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
111 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
112 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
113 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
114 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
115 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
116 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
117 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
118 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
119 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
53d0bd1e
SH
120 [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
121 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
42849b26 122 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
436f2ad0 123 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
f21519b2
YD
124};
125
f21519b2 126static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
057aec0d 127 struct ib_device *ib_dev,
f21519b2
YD
128 struct ib_udata *ucore,
129 struct ib_udata *uhw) = {
130 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
5a77abf9 131 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
02d1aa7a 132 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
565197dd 133 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
6d8a7497 134 [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
f213c052
YH
135 [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
136 [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
137 [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
de019a94
YH
138 [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
139 [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
bc38a6ab
RD
140};
141
bc38a6ab 142static void ib_uverbs_add_one(struct ib_device *device);
7c1eb45a 143static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
bc38a6ab 144
feb7c1e3
CH
145int uverbs_dealloc_mw(struct ib_mw *mw)
146{
147 struct ib_pd *pd = mw->pd;
148 int ret;
149
150 ret = mw->device->dealloc_mw(mw);
151 if (!ret)
152 atomic_dec(&pd->usecnt);
153 return ret;
154}
155
35d4a0b6 156static void ib_uverbs_release_dev(struct kobject *kobj)
70a30e16
RD
157{
158 struct ib_uverbs_device *dev =
35d4a0b6 159 container_of(kobj, struct ib_uverbs_device, kobj);
70a30e16 160
036b1063 161 cleanup_srcu_struct(&dev->disassociate_srcu);
35d4a0b6 162 kfree(dev);
70a30e16
RD
163}
164
35d4a0b6
YH
165static struct kobj_type ib_uverbs_dev_ktype = {
166 .release = ib_uverbs_release_dev,
167};
168
04d29b0e
RD
169static void ib_uverbs_release_event_file(struct kref *ref)
170{
171 struct ib_uverbs_event_file *file =
172 container_of(ref, struct ib_uverbs_event_file, ref);
173
174 kfree(file);
175}
176
70a30e16
RD
177void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
178 struct ib_uverbs_event_file *ev_file,
179 struct ib_ucq_object *uobj)
180{
181 struct ib_uverbs_event *evt, *tmp;
182
183 if (ev_file) {
184 spin_lock_irq(&ev_file->lock);
185 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
186 list_del(&evt->list);
187 kfree(evt);
188 }
189 spin_unlock_irq(&ev_file->lock);
190
191 kref_put(&ev_file->ref, ib_uverbs_release_event_file);
192 }
193
194 spin_lock_irq(&file->async_file->lock);
195 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
196 list_del(&evt->list);
197 kfree(evt);
198 }
199 spin_unlock_irq(&file->async_file->lock);
200}
201
202void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
203 struct ib_uevent_object *uobj)
204{
205 struct ib_uverbs_event *evt, *tmp;
206
207 spin_lock_irq(&file->async_file->lock);
208 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
209 list_del(&evt->list);
210 kfree(evt);
211 }
212 spin_unlock_irq(&file->async_file->lock);
213}
214
f4e40156
JM
215static void ib_uverbs_detach_umcast(struct ib_qp *qp,
216 struct ib_uqp_object *uobj)
217{
218 struct ib_uverbs_mcast_entry *mcast, *tmp;
219
220 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
221 ib_detach_mcast(qp, &mcast->gid, mcast->lid);
222 list_del(&mcast->list);
223 kfree(mcast);
224 }
225}
226
70a30e16
RD
227static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
228 struct ib_ucontext *context)
bc38a6ab
RD
229{
230 struct ib_uobject *uobj, *tmp;
231
f7c6a7b5
RD
232 context->closing = 1;
233
67cdb40c 234 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
9ead190b
RD
235 struct ib_ah *ah = uobj->object;
236
237 idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
67cdb40c 238 ib_destroy_ah(ah);
67cdb40c
RD
239 kfree(uobj);
240 }
bc38a6ab 241
6b52a12b
SM
242 /* Remove MWs before QPs, in order to support type 2A MWs. */
243 list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
244 struct ib_mw *mw = uobj->object;
245
246 idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
feb7c1e3 247 uverbs_dealloc_mw(mw);
6b52a12b
SM
248 kfree(uobj);
249 }
250
436f2ad0
HHZ
251 list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
252 struct ib_flow *flow_id = uobj->object;
253
254 idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
255 ib_destroy_flow(flow_id);
256 kfree(uobj);
257 }
258
bc38a6ab 259 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
9ead190b 260 struct ib_qp *qp = uobj->object;
f4e40156
JM
261 struct ib_uqp_object *uqp =
262 container_of(uobj, struct ib_uqp_object, uevent.uobject);
9ead190b
RD
263
264 idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
0e0ec7e0
SH
265 if (qp != qp->real_qp) {
266 ib_close_qp(qp);
b93f3c18
SH
267 } else {
268 ib_uverbs_detach_umcast(qp, uqp);
269 ib_destroy_qp(qp);
270 }
f4e40156
JM
271 ib_uverbs_release_uevent(file, &uqp->uevent);
272 kfree(uqp);
bc38a6ab
RD
273 }
274
de019a94
YH
275 list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
276 struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
277 struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
278
279 idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
280 ib_destroy_rwq_ind_table(rwq_ind_tbl);
281 kfree(ind_tbl);
282 kfree(uobj);
283 }
284
f213c052
YH
285 list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
286 struct ib_wq *wq = uobj->object;
287 struct ib_uwq_object *uwq =
288 container_of(uobj, struct ib_uwq_object, uevent.uobject);
289
290 idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
291 ib_destroy_wq(wq);
292 ib_uverbs_release_uevent(file, &uwq->uevent);
293 kfree(uwq);
294 }
295
a233c4b5
SD
296 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
297 struct ib_srq *srq = uobj->object;
298 struct ib_uevent_object *uevent =
299 container_of(uobj, struct ib_uevent_object, uobject);
300
301 idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
302 ib_destroy_srq(srq);
303 ib_uverbs_release_uevent(file, uevent);
304 kfree(uevent);
305 }
306
bc38a6ab 307 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
9ead190b 308 struct ib_cq *cq = uobj->object;
70a30e16
RD
309 struct ib_uverbs_event_file *ev_file = cq->cq_context;
310 struct ib_ucq_object *ucq =
311 container_of(uobj, struct ib_ucq_object, uobject);
9ead190b
RD
312
313 idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
bc38a6ab 314 ib_destroy_cq(cq);
70a30e16
RD
315 ib_uverbs_release_ucq(file, ev_file, ucq);
316 kfree(ucq);
bc38a6ab
RD
317 }
318
bc38a6ab 319 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
9ead190b 320 struct ib_mr *mr = uobj->object;
bc38a6ab 321
9ead190b 322 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
bc38a6ab 323 ib_dereg_mr(mr);
f7c6a7b5 324 kfree(uobj);
bc38a6ab
RD
325 }
326
53d0bd1e
SH
327 mutex_lock(&file->device->xrcd_tree_mutex);
328 list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
329 struct ib_xrcd *xrcd = uobj->object;
330 struct ib_uxrcd_object *uxrcd =
331 container_of(uobj, struct ib_uxrcd_object, uobject);
332
333 idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
334 ib_uverbs_dealloc_xrcd(file->device, xrcd);
335 kfree(uxrcd);
336 }
337 mutex_unlock(&file->device->xrcd_tree_mutex);
338
bc38a6ab 339 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
9ead190b
RD
340 struct ib_pd *pd = uobj->object;
341
342 idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
bc38a6ab 343 ib_dealloc_pd(pd);
bc38a6ab
RD
344 kfree(uobj);
345 }
346
8ada2c1c
SR
347 put_pid(context->tgid);
348
bc38a6ab
RD
349 return context->device->dealloc_ucontext(context);
350}
351
35d4a0b6
YH
352static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
353{
354 complete(&dev->comp);
355}
356
bc38a6ab
RD
357static void ib_uverbs_release_file(struct kref *ref)
358{
359 struct ib_uverbs_file *file =
360 container_of(ref, struct ib_uverbs_file, ref);
036b1063
YH
361 struct ib_device *ib_dev;
362 int srcu_key;
363
364 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
365 ib_dev = srcu_dereference(file->device->ib_dev,
366 &file->device->disassociate_srcu);
367 if (ib_dev && !ib_dev->disassociate_ucontext)
368 module_put(ib_dev->owner);
369 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
bc38a6ab 370
35d4a0b6
YH
371 if (atomic_dec_and_test(&file->device->refcount))
372 ib_uverbs_comp_dev(file->device);
70a30e16 373
bc38a6ab
RD
374 kfree(file);
375}
376
377static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
378 size_t count, loff_t *pos)
379{
380 struct ib_uverbs_event_file *file = filp->private_data;
63aaf647 381 struct ib_uverbs_event *event;
bc38a6ab
RD
382 int eventsz;
383 int ret = 0;
384
385 spin_lock_irq(&file->lock);
386
6b73597e 387 while (list_empty(&file->event_list)) {
bc38a6ab
RD
388 spin_unlock_irq(&file->lock);
389
390 if (filp->f_flags & O_NONBLOCK)
391 return -EAGAIN;
392
393 if (wait_event_interruptible(file->poll_wait,
036b1063
YH
394 (!list_empty(&file->event_list) ||
395 /* The barriers built into wait_event_interruptible()
396 * and wake_up() guarentee this will see the null set
397 * without using RCU
398 */
399 !file->uverbs_file->device->ib_dev)))
bc38a6ab
RD
400 return -ERESTARTSYS;
401
036b1063
YH
402 /* If device was disassociated and no event exists set an error */
403 if (list_empty(&file->event_list) &&
404 !file->uverbs_file->device->ib_dev)
405 return -EIO;
406
bc38a6ab
RD
407 spin_lock_irq(&file->lock);
408 }
409
63aaf647
RD
410 event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
411
412 if (file->is_async)
bc38a6ab 413 eventsz = sizeof (struct ib_uverbs_async_event_desc);
63aaf647 414 else
bc38a6ab 415 eventsz = sizeof (struct ib_uverbs_comp_event_desc);
bc38a6ab
RD
416
417 if (eventsz > count) {
418 ret = -EINVAL;
419 event = NULL;
63aaf647 420 } else {
bc38a6ab 421 list_del(file->event_list.next);
63aaf647
RD
422 if (event->counter) {
423 ++(*event->counter);
424 list_del(&event->obj_list);
425 }
426 }
bc38a6ab
RD
427
428 spin_unlock_irq(&file->lock);
429
430 if (event) {
431 if (copy_to_user(buf, event, eventsz))
432 ret = -EFAULT;
433 else
434 ret = eventsz;
435 }
436
437 kfree(event);
438
439 return ret;
440}
441
442static unsigned int ib_uverbs_event_poll(struct file *filp,
443 struct poll_table_struct *wait)
444{
445 unsigned int pollflags = 0;
446 struct ib_uverbs_event_file *file = filp->private_data;
447
448 poll_wait(filp, &file->poll_wait, wait);
449
450 spin_lock_irq(&file->lock);
6b73597e 451 if (!list_empty(&file->event_list))
bc38a6ab
RD
452 pollflags = POLLIN | POLLRDNORM;
453 spin_unlock_irq(&file->lock);
454
455 return pollflags;
456}
457
abdf119b
GN
458static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
459{
460 struct ib_uverbs_event_file *file = filp->private_data;
461
462 return fasync_helper(fd, filp, on, &file->async_queue);
463}
464
bc38a6ab
RD
465static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
466{
467 struct ib_uverbs_event_file *file = filp->private_data;
6b73597e 468 struct ib_uverbs_event *entry, *tmp;
036b1063 469 int closed_already = 0;
6b73597e 470
036b1063 471 mutex_lock(&file->uverbs_file->device->lists_mutex);
6b73597e 472 spin_lock_irq(&file->lock);
036b1063 473 closed_already = file->is_closed;
1ae5c187 474 file->is_closed = 1;
6b73597e
RD
475 list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
476 if (entry->counter)
477 list_del(&entry->obj_list);
478 kfree(entry);
479 }
480 spin_unlock_irq(&file->lock);
036b1063
YH
481 if (!closed_already) {
482 list_del(&file->list);
483 if (file->is_async)
484 ib_unregister_event_handler(&file->uverbs_file->
485 event_handler);
486 }
487 mutex_unlock(&file->uverbs_file->device->lists_mutex);
bc38a6ab 488
03c40442 489 kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
6b73597e 490 kref_put(&file->ref, ib_uverbs_release_event_file);
bc38a6ab
RD
491
492 return 0;
493}
494
2b8693c0 495static const struct file_operations uverbs_event_fops = {
6b73597e 496 .owner = THIS_MODULE,
9afed76d 497 .read = ib_uverbs_event_read,
bc38a6ab 498 .poll = ib_uverbs_event_poll,
abdf119b 499 .release = ib_uverbs_event_close,
bc1db9af
RD
500 .fasync = ib_uverbs_event_fasync,
501 .llseek = no_llseek,
bc38a6ab
RD
502};
503
504void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
505{
6b73597e
RD
506 struct ib_uverbs_event_file *file = cq_context;
507 struct ib_ucq_object *uobj;
508 struct ib_uverbs_event *entry;
509 unsigned long flags;
510
511 if (!file)
512 return;
513
514 spin_lock_irqsave(&file->lock, flags);
1ae5c187 515 if (file->is_closed) {
6b73597e
RD
516 spin_unlock_irqrestore(&file->lock, flags);
517 return;
518 }
bc38a6ab
RD
519
520 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
305a7e87
RD
521 if (!entry) {
522 spin_unlock_irqrestore(&file->lock, flags);
bc38a6ab 523 return;
305a7e87 524 }
bc38a6ab 525
63aaf647
RD
526 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
527
528 entry->desc.comp.cq_handle = cq->uobject->user_handle;
529 entry->counter = &uobj->comp_events_reported;
bc38a6ab 530
6b73597e 531 list_add_tail(&entry->list, &file->event_list);
63aaf647 532 list_add_tail(&entry->obj_list, &uobj->comp_list);
6b73597e 533 spin_unlock_irqrestore(&file->lock, flags);
bc38a6ab 534
6b73597e
RD
535 wake_up_interruptible(&file->poll_wait);
536 kill_fasync(&file->async_queue, SIGIO, POLL_IN);
bc38a6ab
RD
537}
538
539static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
63aaf647
RD
540 __u64 element, __u64 event,
541 struct list_head *obj_list,
542 u32 *counter)
bc38a6ab 543{
63aaf647 544 struct ib_uverbs_event *entry;
bc38a6ab
RD
545 unsigned long flags;
546
6b73597e 547 spin_lock_irqsave(&file->async_file->lock, flags);
fb77bcef 548 if (file->async_file->is_closed) {
6b73597e
RD
549 spin_unlock_irqrestore(&file->async_file->lock, flags);
550 return;
551 }
552
bc38a6ab 553 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
305a7e87
RD
554 if (!entry) {
555 spin_unlock_irqrestore(&file->async_file->lock, flags);
bc38a6ab 556 return;
305a7e87 557 }
bc38a6ab 558
63aaf647
RD
559 entry->desc.async.element = element;
560 entry->desc.async.event_type = event;
377b5134 561 entry->desc.async.reserved = 0;
63aaf647 562 entry->counter = counter;
bc38a6ab 563
6b73597e 564 list_add_tail(&entry->list, &file->async_file->event_list);
63aaf647
RD
565 if (obj_list)
566 list_add_tail(&entry->obj_list, obj_list);
6b73597e 567 spin_unlock_irqrestore(&file->async_file->lock, flags);
bc38a6ab 568
6b73597e
RD
569 wake_up_interruptible(&file->async_file->poll_wait);
570 kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
bc38a6ab
RD
571}
572
573void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
574{
7162a3e0
RD
575 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
576 struct ib_ucq_object, uobject);
63aaf647 577
7162a3e0 578 ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
63aaf647
RD
579 event->event, &uobj->async_list,
580 &uobj->async_events_reported);
bc38a6ab
RD
581}
582
583void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
584{
63aaf647
RD
585 struct ib_uevent_object *uobj;
586
a040f95d
JM
587 /* for XRC target qp's, check that qp is live */
588 if (!event->element.qp->uobject || !event->element.qp->uobject->live)
589 return;
590
63aaf647
RD
591 uobj = container_of(event->element.qp->uobject,
592 struct ib_uevent_object, uobject);
593
594 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
595 event->event, &uobj->event_list,
596 &uobj->events_reported);
bc38a6ab
RD
597}
598
f213c052
YH
599void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
600{
601 struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
602 struct ib_uevent_object, uobject);
603
604 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
605 event->event, &uobj->event_list,
606 &uobj->events_reported);
607}
608
f520ba5a
RD
609void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
610{
63aaf647
RD
611 struct ib_uevent_object *uobj;
612
613 uobj = container_of(event->element.srq->uobject,
614 struct ib_uevent_object, uobject);
615
616 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
617 event->event, &uobj->event_list,
618 &uobj->events_reported);
f520ba5a
RD
619}
620
6b73597e
RD
621void ib_uverbs_event_handler(struct ib_event_handler *handler,
622 struct ib_event *event)
bc38a6ab
RD
623{
624 struct ib_uverbs_file *file =
625 container_of(handler, struct ib_uverbs_file, event_handler);
626
63aaf647
RD
627 ib_uverbs_async_handler(file, event->element.port_num, event->event,
628 NULL, NULL);
bc38a6ab
RD
629}
630
03c40442
YH
631void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
632{
633 kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
634 file->async_file = NULL;
635}
636
6b73597e 637struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
057aec0d 638 struct ib_device *ib_dev,
b1e4594b 639 int is_async)
bc38a6ab 640{
6b73597e 641 struct ib_uverbs_event_file *ev_file;
bc38a6ab 642 struct file *filp;
03c40442 643 int ret;
bc38a6ab 644
03c40442 645 ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
6b73597e
RD
646 if (!ev_file)
647 return ERR_PTR(-ENOMEM);
648
649 kref_init(&ev_file->ref);
650 spin_lock_init(&ev_file->lock);
651 INIT_LIST_HEAD(&ev_file->event_list);
652 init_waitqueue_head(&ev_file->poll_wait);
653 ev_file->uverbs_file = uverbs_file;
03c40442 654 kref_get(&ev_file->uverbs_file->ref);
6b73597e 655 ev_file->async_queue = NULL;
1ae5c187 656 ev_file->is_closed = 0;
6b73597e 657
b1e4594b 658 filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
a265e558 659 ev_file, O_RDONLY);
b1e4594b 660 if (IS_ERR(filp))
03c40442
YH
661 goto err_put_refs;
662
036b1063
YH
663 mutex_lock(&uverbs_file->device->lists_mutex);
664 list_add_tail(&ev_file->list,
665 &uverbs_file->device->uverbs_events_file_list);
666 mutex_unlock(&uverbs_file->device->lists_mutex);
667
03c40442
YH
668 if (is_async) {
669 WARN_ON(uverbs_file->async_file);
670 uverbs_file->async_file = ev_file;
671 kref_get(&uverbs_file->async_file->ref);
672 INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
057aec0d 673 ib_dev,
03c40442
YH
674 ib_uverbs_event_handler);
675 ret = ib_register_event_handler(&uverbs_file->event_handler);
676 if (ret)
677 goto err_put_file;
678
679 /* At that point async file stuff was fully set */
680 ev_file->is_async = 1;
681 }
682
683 return filp;
684
685err_put_file:
686 fput(filp);
687 kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
688 uverbs_file->async_file = NULL;
689 return ERR_PTR(ret);
a7dab9e8 690
03c40442
YH
691err_put_refs:
692 kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
693 kref_put(&ev_file->ref, ib_uverbs_release_event_file);
6b73597e 694 return filp;
6b73597e
RD
695}
696
697/*
698 * Look up a completion event file by FD. If lookup is successful,
699 * takes a ref to the event file struct that it returns; if
700 * unsuccessful, returns NULL.
701 */
702struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
703{
704 struct ib_uverbs_event_file *ev_file = NULL;
2903ff01 705 struct fd f = fdget(fd);
6b73597e 706
2903ff01 707 if (!f.file)
6b73597e
RD
708 return NULL;
709
2903ff01 710 if (f.file->f_op != &uverbs_event_fops)
6b73597e
RD
711 goto out;
712
2903ff01 713 ev_file = f.file->private_data;
6b73597e
RD
714 if (ev_file->is_async) {
715 ev_file = NULL;
716 goto out;
717 }
718
719 kref_get(&ev_file->ref);
720
721out:
2903ff01 722 fdput(f);
6b73597e 723 return ev_file;
bc38a6ab
RD
724}
725
2dbd5186
EC
726static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
727{
728 u64 mask;
729
730 if (command <= IB_USER_VERBS_CMD_OPEN_QP)
731 mask = ib_dev->uverbs_cmd_mask;
732 else
733 mask = ib_dev->uverbs_ex_cmd_mask;
734
735 if (mask & ((u64)1 << command))
736 return 0;
737
738 return -1;
739}
740
bc38a6ab
RD
741static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
742 size_t count, loff_t *pos)
743{
744 struct ib_uverbs_file *file = filp->private_data;
036b1063 745 struct ib_device *ib_dev;
bc38a6ab 746 struct ib_uverbs_cmd_hdr hdr;
74a0b0a5 747 __u32 command;
f21519b2 748 __u32 flags;
036b1063
YH
749 int srcu_key;
750 ssize_t ret;
057aec0d 751
e6bd18f5
JG
752 if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
753 return -EACCES;
754
bc38a6ab
RD
755 if (count < sizeof hdr)
756 return -EINVAL;
757
758 if (copy_from_user(&hdr, buf, sizeof hdr))
759 return -EFAULT;
760
036b1063
YH
761 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
762 ib_dev = srcu_dereference(file->device->ib_dev,
763 &file->device->disassociate_srcu);
764 if (!ib_dev) {
765 ret = -EIO;
766 goto out;
767 }
768
74a0b0a5
EC
769 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
770 IB_USER_VERBS_CMD_COMMAND_MASK)) {
771 ret = -EINVAL;
772 goto out;
773 }
774
775 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
2dbd5186
EC
776 if (verify_command_mask(ib_dev, command)) {
777 ret = -EOPNOTSUPP;
778 goto out;
779 }
74a0b0a5 780
eaebc7d2
EC
781 if (!file->ucontext &&
782 command != IB_USER_VERBS_CMD_GET_CONTEXT) {
783 ret = -EINVAL;
784 goto out;
785 }
786
f21519b2
YD
787 flags = (hdr.command &
788 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
bc38a6ab 789
f21519b2 790 if (!flags) {
f21519b2 791 if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
036b1063
YH
792 !uverbs_cmd_table[command]) {
793 ret = -EINVAL;
794 goto out;
795 }
400dbc96 796
036b1063
YH
797 if (hdr.in_words * 4 != count) {
798 ret = -EINVAL;
799 goto out;
800 }
400dbc96 801
036b1063 802 ret = uverbs_cmd_table[command](file, ib_dev,
f21519b2
YD
803 buf + sizeof(hdr),
804 hdr.in_words * 4,
805 hdr.out_words * 4);
806
f21519b2 807 } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
f21519b2
YD
808 struct ib_uverbs_ex_cmd_hdr ex_hdr;
809 struct ib_udata ucore;
810 struct ib_udata uhw;
f21519b2
YD
811 size_t written_count = count;
812
f21519b2 813 if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
036b1063
YH
814 !uverbs_ex_cmd_table[command]) {
815 ret = -ENOSYS;
816 goto out;
817 }
f21519b2 818
036b1063
YH
819 if (!file->ucontext) {
820 ret = -EINVAL;
821 goto out;
822 }
f21519b2 823
036b1063
YH
824 if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
825 ret = -EINVAL;
826 goto out;
827 }
f21519b2 828
036b1063
YH
829 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
830 ret = -EFAULT;
831 goto out;
832 }
f21519b2
YD
833
834 count -= sizeof(hdr) + sizeof(ex_hdr);
835 buf += sizeof(hdr) + sizeof(ex_hdr);
836
036b1063
YH
837 if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
838 ret = -EINVAL;
839 goto out;
840 }
f21519b2 841
036b1063
YH
842 if (ex_hdr.cmd_hdr_reserved) {
843 ret = -EINVAL;
844 goto out;
845 }
7efb1b19 846
f21519b2 847 if (ex_hdr.response) {
036b1063
YH
848 if (!hdr.out_words && !ex_hdr.provider_out_words) {
849 ret = -EINVAL;
850 goto out;
851 }
6cc3df84
YD
852
853 if (!access_ok(VERIFY_WRITE,
854 (void __user *) (unsigned long) ex_hdr.response,
036b1063
YH
855 (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
856 ret = -EFAULT;
857 goto out;
858 }
f21519b2 859 } else {
036b1063
YH
860 if (hdr.out_words || ex_hdr.provider_out_words) {
861 ret = -EINVAL;
862 goto out;
863 }
f21519b2
YD
864 }
865
a96e4e2f
RD
866 INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
867 hdr.in_words * 8, hdr.out_words * 8);
868
869 INIT_UDATA_BUF_OR_NULL(&uhw,
870 buf + ucore.inlen,
871 (unsigned long) ex_hdr.response + ucore.outlen,
872 ex_hdr.provider_in_words * 8,
873 ex_hdr.provider_out_words * 8);
f21519b2 874
036b1063 875 ret = uverbs_ex_cmd_table[command](file,
057aec0d 876 ib_dev,
f21519b2
YD
877 &ucore,
878 &uhw);
036b1063
YH
879 if (!ret)
880 ret = written_count;
881 } else {
882 ret = -ENOSYS;
400dbc96 883 }
f21519b2 884
036b1063
YH
885out:
886 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
887 return ret;
bc38a6ab
RD
888}
889
890static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
891{
892 struct ib_uverbs_file *file = filp->private_data;
036b1063
YH
893 struct ib_device *ib_dev;
894 int ret = 0;
895 int srcu_key;
bc38a6ab 896
036b1063
YH
897 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
898 ib_dev = srcu_dereference(file->device->ib_dev,
899 &file->device->disassociate_srcu);
900 if (!ib_dev) {
901 ret = -EIO;
902 goto out;
903 }
904
905 if (!file->ucontext)
906 ret = -ENODEV;
bc38a6ab 907 else
036b1063
YH
908 ret = ib_dev->mmap(file->ucontext, vma);
909out:
910 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
911 return ret;
bc38a6ab
RD
912}
913
5b2d281a
RD
914/*
915 * ib_uverbs_open() does not need the BKL:
916 *
2a72f212 917 * - the ib_uverbs_device structures are properly reference counted and
5b2d281a
RD
918 * everything else is purely local to the file being created, so
919 * races against other open calls are not a problem;
920 * - there is no ioctl method to race against;
2a72f212
AC
921 * - the open method will either immediately run -ENXIO, or all
922 * required initialization will be done.
5b2d281a 923 */
bc38a6ab
RD
924static int ib_uverbs_open(struct inode *inode, struct file *filp)
925{
70a30e16 926 struct ib_uverbs_device *dev;
bc38a6ab 927 struct ib_uverbs_file *file;
036b1063 928 struct ib_device *ib_dev;
70a30e16 929 int ret;
036b1063
YH
930 int module_dependent;
931 int srcu_key;
bc38a6ab 932
2a72f212 933 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
35d4a0b6 934 if (!atomic_inc_not_zero(&dev->refcount))
70a30e16
RD
935 return -ENXIO;
936
036b1063
YH
937 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
938 mutex_lock(&dev->lists_mutex);
939 ib_dev = srcu_dereference(dev->ib_dev,
940 &dev->disassociate_srcu);
941 if (!ib_dev) {
942 ret = -EIO;
70a30e16
RD
943 goto err;
944 }
bc38a6ab 945
036b1063
YH
946 /* In case IB device supports disassociate ucontext, there is no hard
947 * dependency between uverbs device and its low level device.
948 */
949 module_dependent = !(ib_dev->disassociate_ucontext);
950
951 if (module_dependent) {
952 if (!try_module_get(ib_dev->owner)) {
953 ret = -ENODEV;
954 goto err;
955 }
956 }
957
958 file = kzalloc(sizeof(*file), GFP_KERNEL);
63c47c28 959 if (!file) {
70a30e16 960 ret = -ENOMEM;
036b1063
YH
961 if (module_dependent)
962 goto err_module;
963
964 goto err;
63c47c28 965 }
bc38a6ab 966
70a30e16
RD
967 file->device = dev;
968 file->ucontext = NULL;
969 file->async_file = NULL;
bc38a6ab 970 kref_init(&file->ref);
95ed644f 971 mutex_init(&file->mutex);
bc38a6ab 972
bc38a6ab 973 filp->private_data = file;
35d4a0b6 974 kobject_get(&dev->kobj);
036b1063
YH
975 list_add_tail(&file->list, &dev->uverbs_file_list);
976 mutex_unlock(&dev->lists_mutex);
977 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
bc38a6ab 978
bc1db9af 979 return nonseekable_open(inode, filp);
70a30e16
RD
980
981err_module:
036b1063 982 module_put(ib_dev->owner);
70a30e16
RD
983
984err:
036b1063
YH
985 mutex_unlock(&dev->lists_mutex);
986 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
35d4a0b6
YH
987 if (atomic_dec_and_test(&dev->refcount))
988 ib_uverbs_comp_dev(dev);
989
70a30e16 990 return ret;
bc38a6ab
RD
991}
992
993static int ib_uverbs_close(struct inode *inode, struct file *filp)
994{
995 struct ib_uverbs_file *file = filp->private_data;
35d4a0b6 996 struct ib_uverbs_device *dev = file->device;
036b1063
YH
997 struct ib_ucontext *ucontext = NULL;
998
999 mutex_lock(&file->device->lists_mutex);
1000 ucontext = file->ucontext;
1001 file->ucontext = NULL;
1002 if (!file->is_closed) {
1003 list_del(&file->list);
1004 file->is_closed = 1;
1005 }
1006 mutex_unlock(&file->device->lists_mutex);
1007 if (ucontext)
1008 ib_uverbs_cleanup_ucontext(file, ucontext);
70a30e16
RD
1009
1010 if (file->async_file)
1011 kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
bc38a6ab 1012
bc38a6ab 1013 kref_put(&file->ref, ib_uverbs_release_file);
35d4a0b6 1014 kobject_put(&dev->kobj);
bc38a6ab
RD
1015
1016 return 0;
1017}
1018
2b8693c0 1019static const struct file_operations uverbs_fops = {
9afed76d
AC
1020 .owner = THIS_MODULE,
1021 .write = ib_uverbs_write,
1022 .open = ib_uverbs_open,
bc1db9af
RD
1023 .release = ib_uverbs_close,
1024 .llseek = no_llseek,
bc38a6ab
RD
1025};
1026
2b8693c0 1027static const struct file_operations uverbs_mmap_fops = {
9afed76d
AC
1028 .owner = THIS_MODULE,
1029 .write = ib_uverbs_write,
bc38a6ab 1030 .mmap = ib_uverbs_mmap,
9afed76d 1031 .open = ib_uverbs_open,
bc1db9af
RD
1032 .release = ib_uverbs_close,
1033 .llseek = no_llseek,
bc38a6ab
RD
1034};
1035
1036static struct ib_client uverbs_client = {
1037 .name = "uverbs",
1038 .add = ib_uverbs_add_one,
1039 .remove = ib_uverbs_remove_one
1040};
1041
f4e91eb4
TJ
1042static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
1043 char *buf)
bc38a6ab 1044{
036b1063
YH
1045 int ret = -ENODEV;
1046 int srcu_key;
f4e91eb4 1047 struct ib_uverbs_device *dev = dev_get_drvdata(device);
036b1063 1048 struct ib_device *ib_dev;
70a30e16
RD
1049
1050 if (!dev)
1051 return -ENODEV;
bc38a6ab 1052
036b1063
YH
1053 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1054 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1055 if (ib_dev)
1056 ret = sprintf(buf, "%s\n", ib_dev->name);
1057 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1058
1059 return ret;
bc38a6ab 1060}
f4e91eb4 1061static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
bc38a6ab 1062
f4e91eb4
TJ
1063static ssize_t show_dev_abi_version(struct device *device,
1064 struct device_attribute *attr, char *buf)
274c0891 1065{
f4e91eb4 1066 struct ib_uverbs_device *dev = dev_get_drvdata(device);
036b1063
YH
1067 int ret = -ENODEV;
1068 int srcu_key;
1069 struct ib_device *ib_dev;
70a30e16
RD
1070
1071 if (!dev)
1072 return -ENODEV;
036b1063
YH
1073 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1074 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1075 if (ib_dev)
1076 ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
1077 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
274c0891 1078
036b1063 1079 return ret;
274c0891 1080}
f4e91eb4 1081static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
274c0891 1082
0933e2d9
AK
1083static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1084 __stringify(IB_USER_VERBS_ABI_VERSION));
bc38a6ab 1085
6d6a0e71
AC
1086static dev_t overflow_maj;
1087static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
1088
1089/*
1090 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
1091 * requesting a new major number and doubling the number of max devices we
1092 * support. It's stupid, but simple.
1093 */
1094static int find_overflow_devnum(void)
1095{
1096 int ret;
1097
1098 if (!overflow_maj) {
1099 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
1100 "infiniband_verbs");
1101 if (ret) {
aba25a3e 1102 pr_err("user_verbs: couldn't register dynamic device number\n");
6d6a0e71
AC
1103 return ret;
1104 }
1105 }
1106
1107 ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
1108 if (ret >= IB_UVERBS_MAX_DEVICES)
1109 return -1;
1110
1111 return ret;
1112}
1113
bc38a6ab
RD
1114static void ib_uverbs_add_one(struct ib_device *device)
1115{
38707980 1116 int devnum;
ddbd6883 1117 dev_t base;
bc38a6ab 1118 struct ib_uverbs_device *uverbs_dev;
036b1063 1119 int ret;
bc38a6ab
RD
1120
1121 if (!device->alloc_ucontext)
1122 return;
1123
de6eb66b 1124 uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
bc38a6ab
RD
1125 if (!uverbs_dev)
1126 return;
1127
036b1063
YH
1128 ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
1129 if (ret) {
1130 kfree(uverbs_dev);
1131 return;
1132 }
1133
35d4a0b6 1134 atomic_set(&uverbs_dev->refcount, 1);
fd60ae40 1135 init_completion(&uverbs_dev->comp);
53d0bd1e
SH
1136 uverbs_dev->xrcd_tree = RB_ROOT;
1137 mutex_init(&uverbs_dev->xrcd_tree_mutex);
35d4a0b6 1138 kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
036b1063
YH
1139 mutex_init(&uverbs_dev->lists_mutex);
1140 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
1141 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
70a30e16 1142
bc38a6ab 1143 spin_lock(&map_lock);
38707980
AC
1144 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
1145 if (devnum >= IB_UVERBS_MAX_DEVICES) {
bc38a6ab 1146 spin_unlock(&map_lock);
6d6a0e71
AC
1147 devnum = find_overflow_devnum();
1148 if (devnum < 0)
1149 goto err;
1150
1151 spin_lock(&map_lock);
1152 uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
1153 base = devnum + overflow_maj;
1154 set_bit(devnum, overflow_map);
1155 } else {
1156 uverbs_dev->devnum = devnum;
1157 base = devnum + IB_UVERBS_BASE_DEV;
1158 set_bit(devnum, dev_map);
bc38a6ab 1159 }
bc38a6ab
RD
1160 spin_unlock(&map_lock);
1161
036b1063 1162 rcu_assign_pointer(uverbs_dev->ib_dev, device);
f4fd0b22 1163 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
bc38a6ab 1164
055422dd
AC
1165 cdev_init(&uverbs_dev->cdev, NULL);
1166 uverbs_dev->cdev.owner = THIS_MODULE;
1167 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
35d4a0b6 1168 uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
055422dd 1169 kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
ddbd6883 1170 if (cdev_add(&uverbs_dev->cdev, base, 1))
70a30e16 1171 goto err_cdev;
bc38a6ab 1172
91bd418f 1173 uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
055422dd 1174 uverbs_dev->cdev.dev, uverbs_dev,
91bd418f 1175 "uverbs%d", uverbs_dev->devnum);
f4e91eb4 1176 if (IS_ERR(uverbs_dev->dev))
bc38a6ab
RD
1177 goto err_cdev;
1178
f4e91eb4 1179 if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
bc38a6ab 1180 goto err_class;
f4e91eb4 1181 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
274c0891 1182 goto err_class;
bc38a6ab
RD
1183
1184 ib_set_client_data(device, &uverbs_client, uverbs_dev);
1185
1186 return;
1187
1188err_class:
055422dd 1189 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
bc38a6ab
RD
1190
1191err_cdev:
055422dd 1192 cdev_del(&uverbs_dev->cdev);
6d6a0e71
AC
1193 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
1194 clear_bit(devnum, dev_map);
1195 else
1196 clear_bit(devnum, overflow_map);
bc38a6ab
RD
1197
1198err:
35d4a0b6
YH
1199 if (atomic_dec_and_test(&uverbs_dev->refcount))
1200 ib_uverbs_comp_dev(uverbs_dev);
fd60ae40 1201 wait_for_completion(&uverbs_dev->comp);
35d4a0b6 1202 kobject_put(&uverbs_dev->kobj);
bc38a6ab
RD
1203 return;
1204}
1205
036b1063
YH
1206static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1207 struct ib_device *ib_dev)
1208{
1209 struct ib_uverbs_file *file;
1210 struct ib_uverbs_event_file *event_file;
1211 struct ib_event event;
1212
1213 /* Pending running commands to terminate */
1214 synchronize_srcu(&uverbs_dev->disassociate_srcu);
1215 event.event = IB_EVENT_DEVICE_FATAL;
1216 event.element.port_num = 0;
1217 event.device = ib_dev;
1218
1219 mutex_lock(&uverbs_dev->lists_mutex);
1220 while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1221 struct ib_ucontext *ucontext;
1222
1223 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1224 struct ib_uverbs_file, list);
1225 file->is_closed = 1;
1226 ucontext = file->ucontext;
1227 list_del(&file->list);
1228 file->ucontext = NULL;
1229 kref_get(&file->ref);
1230 mutex_unlock(&uverbs_dev->lists_mutex);
1231 /* We must release the mutex before going ahead and calling
1232 * disassociate_ucontext. disassociate_ucontext might end up
1233 * indirectly calling uverbs_close, for example due to freeing
1234 * the resources (e.g mmput).
1235 */
1236 ib_uverbs_event_handler(&file->event_handler, &event);
1237 if (ucontext) {
1238 ib_dev->disassociate_ucontext(ucontext);
1239 ib_uverbs_cleanup_ucontext(file, ucontext);
1240 }
1241
1242 mutex_lock(&uverbs_dev->lists_mutex);
1243 kref_put(&file->ref, ib_uverbs_release_file);
1244 }
1245
1246 while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
1247 event_file = list_first_entry(&uverbs_dev->
1248 uverbs_events_file_list,
1249 struct ib_uverbs_event_file,
1250 list);
1251 spin_lock_irq(&event_file->lock);
1252 event_file->is_closed = 1;
1253 spin_unlock_irq(&event_file->lock);
1254
1255 list_del(&event_file->list);
1256 if (event_file->is_async) {
1257 ib_unregister_event_handler(&event_file->uverbs_file->
1258 event_handler);
1259 event_file->uverbs_file->event_handler.device = NULL;
1260 }
1261
1262 wake_up_interruptible(&event_file->poll_wait);
1263 kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
1264 }
1265 mutex_unlock(&uverbs_dev->lists_mutex);
1266}
1267
7c1eb45a 1268static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
bc38a6ab 1269{
7c1eb45a 1270 struct ib_uverbs_device *uverbs_dev = client_data;
036b1063 1271 int wait_clients = 1;
bc38a6ab
RD
1272
1273 if (!uverbs_dev)
1274 return;
1275
f4e91eb4 1276 dev_set_drvdata(uverbs_dev->dev, NULL);
055422dd
AC
1277 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1278 cdev_del(&uverbs_dev->cdev);
70a30e16 1279
6d6a0e71
AC
1280 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
1281 clear_bit(uverbs_dev->devnum, dev_map);
1282 else
1283 clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
fd60ae40 1284
036b1063
YH
1285 if (device->disassociate_ucontext) {
1286 /* We disassociate HW resources and immediately return.
1287 * Userspace will see a EIO errno for all future access.
1288 * Upon returning, ib_device may be freed internally and is not
1289 * valid any more.
1290 * uverbs_device is still available until all clients close
1291 * their files, then the uverbs device ref count will be zero
1292 * and its resources will be freed.
1293 * Note: At this point no more files can be opened since the
1294 * cdev was deleted, however active clients can still issue
1295 * commands and close their open files.
1296 */
1297 rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
1298 ib_uverbs_free_hw_resources(uverbs_dev, device);
1299 wait_clients = 0;
1300 }
1301
35d4a0b6
YH
1302 if (atomic_dec_and_test(&uverbs_dev->refcount))
1303 ib_uverbs_comp_dev(uverbs_dev);
036b1063
YH
1304 if (wait_clients)
1305 wait_for_completion(&uverbs_dev->comp);
35d4a0b6 1306 kobject_put(&uverbs_dev->kobj);
bc38a6ab
RD
1307}
1308
2c9ede55 1309static char *uverbs_devnode(struct device *dev, umode_t *mode)
71c29bd5 1310{
b2bc4782
GR
1311 if (mode)
1312 *mode = 0666;
71c29bd5
RD
1313 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1314}
1315
bc38a6ab
RD
1316static int __init ib_uverbs_init(void)
1317{
1318 int ret;
1319
bc38a6ab
RD
1320 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
1321 "infiniband_verbs");
1322 if (ret) {
aba25a3e 1323 pr_err("user_verbs: couldn't register device number\n");
bc38a6ab
RD
1324 goto out;
1325 }
1326
70a30e16
RD
1327 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1328 if (IS_ERR(uverbs_class)) {
1329 ret = PTR_ERR(uverbs_class);
aba25a3e 1330 pr_err("user_verbs: couldn't create class infiniband_verbs\n");
bc38a6ab
RD
1331 goto out_chrdev;
1332 }
1333
71c29bd5
RD
1334 uverbs_class->devnode = uverbs_devnode;
1335
0933e2d9 1336 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
bc38a6ab 1337 if (ret) {
aba25a3e 1338 pr_err("user_verbs: couldn't create abi_version attribute\n");
bc38a6ab
RD
1339 goto out_class;
1340 }
1341
bc38a6ab
RD
1342 ret = ib_register_client(&uverbs_client);
1343 if (ret) {
aba25a3e 1344 pr_err("user_verbs: couldn't register client\n");
a265e558 1345 goto out_class;
bc38a6ab
RD
1346 }
1347
1348 return 0;
1349
bc38a6ab 1350out_class:
70a30e16 1351 class_destroy(uverbs_class);
bc38a6ab
RD
1352
1353out_chrdev:
1354 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1355
1356out:
1357 return ret;
1358}
1359
1360static void __exit ib_uverbs_cleanup(void)
1361{
1362 ib_unregister_client(&uverbs_client);
70a30e16 1363 class_destroy(uverbs_class);
bc38a6ab 1364 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
6d6a0e71
AC
1365 if (overflow_maj)
1366 unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
5d7edb3c
RD
1367 idr_destroy(&ib_uverbs_pd_idr);
1368 idr_destroy(&ib_uverbs_mr_idr);
1369 idr_destroy(&ib_uverbs_mw_idr);
1370 idr_destroy(&ib_uverbs_ah_idr);
1371 idr_destroy(&ib_uverbs_cq_idr);
1372 idr_destroy(&ib_uverbs_qp_idr);
1373 idr_destroy(&ib_uverbs_srq_idr);
bc38a6ab
RD
1374}
1375
1376module_init(ib_uverbs_init);
1377module_exit(ib_uverbs_cleanup);