Merge branch 'drm-sti-next-2015-11-03' of http://git.linaro.org/people/benjamin.gaign...
[linux-2.6-block.git] / drivers / infiniband / core / uverbs_cmd.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
4  * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
5  * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/file.h>
37 #include <linux/fs.h>
38 #include <linux/slab.h>
39 #include <linux/sched.h>
40
41 #include <asm/uaccess.h>
42
43 #include "uverbs.h"
44 #include "core_priv.h"
45
46 struct uverbs_lock_class {
47         struct lock_class_key   key;
48         char                    name[16];
49 };
50
51 static struct uverbs_lock_class pd_lock_class   = { .name = "PD-uobj" };
52 static struct uverbs_lock_class mr_lock_class   = { .name = "MR-uobj" };
53 static struct uverbs_lock_class mw_lock_class   = { .name = "MW-uobj" };
54 static struct uverbs_lock_class cq_lock_class   = { .name = "CQ-uobj" };
55 static struct uverbs_lock_class qp_lock_class   = { .name = "QP-uobj" };
56 static struct uverbs_lock_class ah_lock_class   = { .name = "AH-uobj" };
57 static struct uverbs_lock_class srq_lock_class  = { .name = "SRQ-uobj" };
58 static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
59 static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
60
61 /*
62  * The ib_uobject locking scheme is as follows:
63  *
64  * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
65  *   needs to be held during all idr operations.  When an object is
66  *   looked up, a reference must be taken on the object's kref before
67  *   dropping this lock.
68  *
69  * - Each object also has an rwsem.  This rwsem must be held for
70  *   reading while an operation that uses the object is performed.
71  *   For example, while registering an MR, the associated PD's
72  *   uobject.mutex must be held for reading.  The rwsem must be held
73  *   for writing while initializing or destroying an object.
74  *
75  * - In addition, each object has a "live" flag.  If this flag is not
76  *   set, then lookups of the object will fail even if it is found in
77  *   the idr.  This handles a reader that blocks and does not acquire
78  *   the rwsem until after the object is destroyed.  The destroy
79  *   operation will set the live flag to 0 and then drop the rwsem;
80  *   this will allow the reader to acquire the rwsem, see that the
81  *   live flag is 0, and then drop the rwsem and its reference to
82  *   object.  The underlying storage will not be freed until the last
83  *   reference to the object is dropped.
84  */
85
86 static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
87                       struct ib_ucontext *context, struct uverbs_lock_class *c)
88 {
89         uobj->user_handle = user_handle;
90         uobj->context     = context;
91         kref_init(&uobj->ref);
92         init_rwsem(&uobj->mutex);
93         lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
94         uobj->live        = 0;
95 }
96
97 static void release_uobj(struct kref *kref)
98 {
99         kfree(container_of(kref, struct ib_uobject, ref));
100 }
101
102 static void put_uobj(struct ib_uobject *uobj)
103 {
104         kref_put(&uobj->ref, release_uobj);
105 }
106
107 static void put_uobj_read(struct ib_uobject *uobj)
108 {
109         up_read(&uobj->mutex);
110         put_uobj(uobj);
111 }
112
113 static void put_uobj_write(struct ib_uobject *uobj)
114 {
115         up_write(&uobj->mutex);
116         put_uobj(uobj);
117 }
118
119 static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
120 {
121         int ret;
122
123         idr_preload(GFP_KERNEL);
124         spin_lock(&ib_uverbs_idr_lock);
125
126         ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
127         if (ret >= 0)
128                 uobj->id = ret;
129
130         spin_unlock(&ib_uverbs_idr_lock);
131         idr_preload_end();
132
133         return ret < 0 ? ret : 0;
134 }
135
136 void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
137 {
138         spin_lock(&ib_uverbs_idr_lock);
139         idr_remove(idr, uobj->id);
140         spin_unlock(&ib_uverbs_idr_lock);
141 }
142
143 static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
144                                          struct ib_ucontext *context)
145 {
146         struct ib_uobject *uobj;
147
148         spin_lock(&ib_uverbs_idr_lock);
149         uobj = idr_find(idr, id);
150         if (uobj) {
151                 if (uobj->context == context)
152                         kref_get(&uobj->ref);
153                 else
154                         uobj = NULL;
155         }
156         spin_unlock(&ib_uverbs_idr_lock);
157
158         return uobj;
159 }
160
161 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
162                                         struct ib_ucontext *context, int nested)
163 {
164         struct ib_uobject *uobj;
165
166         uobj = __idr_get_uobj(idr, id, context);
167         if (!uobj)
168                 return NULL;
169
170         if (nested)
171                 down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
172         else
173                 down_read(&uobj->mutex);
174         if (!uobj->live) {
175                 put_uobj_read(uobj);
176                 return NULL;
177         }
178
179         return uobj;
180 }
181
182 static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
183                                          struct ib_ucontext *context)
184 {
185         struct ib_uobject *uobj;
186
187         uobj = __idr_get_uobj(idr, id, context);
188         if (!uobj)
189                 return NULL;
190
191         down_write(&uobj->mutex);
192         if (!uobj->live) {
193                 put_uobj_write(uobj);
194                 return NULL;
195         }
196
197         return uobj;
198 }
199
200 static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
201                           int nested)
202 {
203         struct ib_uobject *uobj;
204
205         uobj = idr_read_uobj(idr, id, context, nested);
206         return uobj ? uobj->object : NULL;
207 }
208
209 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
210 {
211         return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
212 }
213
214 static void put_pd_read(struct ib_pd *pd)
215 {
216         put_uobj_read(pd->uobject);
217 }
218
219 static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
220 {
221         return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
222 }
223
224 static void put_cq_read(struct ib_cq *cq)
225 {
226         put_uobj_read(cq->uobject);
227 }
228
229 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
230 {
231         return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
232 }
233
234 static void put_ah_read(struct ib_ah *ah)
235 {
236         put_uobj_read(ah->uobject);
237 }
238
239 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
240 {
241         return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
242 }
243
244 static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
245 {
246         struct ib_uobject *uobj;
247
248         uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
249         return uobj ? uobj->object : NULL;
250 }
251
252 static void put_qp_read(struct ib_qp *qp)
253 {
254         put_uobj_read(qp->uobject);
255 }
256
257 static void put_qp_write(struct ib_qp *qp)
258 {
259         put_uobj_write(qp->uobject);
260 }
261
262 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
263 {
264         return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
265 }
266
267 static void put_srq_read(struct ib_srq *srq)
268 {
269         put_uobj_read(srq->uobject);
270 }
271
272 static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
273                                      struct ib_uobject **uobj)
274 {
275         *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
276         return *uobj ? (*uobj)->object : NULL;
277 }
278
279 static void put_xrcd_read(struct ib_uobject *uobj)
280 {
281         put_uobj_read(uobj);
282 }
283
284 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
285                               struct ib_device *ib_dev,
286                               const char __user *buf,
287                               int in_len, int out_len)
288 {
289         struct ib_uverbs_get_context      cmd;
290         struct ib_uverbs_get_context_resp resp;
291         struct ib_udata                   udata;
292 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
293         struct ib_device_attr             dev_attr;
294 #endif
295         struct ib_ucontext               *ucontext;
296         struct file                      *filp;
297         int ret;
298
299         if (out_len < sizeof resp)
300                 return -ENOSPC;
301
302         if (copy_from_user(&cmd, buf, sizeof cmd))
303                 return -EFAULT;
304
305         mutex_lock(&file->mutex);
306
307         if (file->ucontext) {
308                 ret = -EINVAL;
309                 goto err;
310         }
311
312         INIT_UDATA(&udata, buf + sizeof cmd,
313                    (unsigned long) cmd.response + sizeof resp,
314                    in_len - sizeof cmd, out_len - sizeof resp);
315
316         ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
317         if (IS_ERR(ucontext)) {
318                 ret = PTR_ERR(ucontext);
319                 goto err;
320         }
321
322         ucontext->device = ib_dev;
323         INIT_LIST_HEAD(&ucontext->pd_list);
324         INIT_LIST_HEAD(&ucontext->mr_list);
325         INIT_LIST_HEAD(&ucontext->mw_list);
326         INIT_LIST_HEAD(&ucontext->cq_list);
327         INIT_LIST_HEAD(&ucontext->qp_list);
328         INIT_LIST_HEAD(&ucontext->srq_list);
329         INIT_LIST_HEAD(&ucontext->ah_list);
330         INIT_LIST_HEAD(&ucontext->xrcd_list);
331         INIT_LIST_HEAD(&ucontext->rule_list);
332         rcu_read_lock();
333         ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
334         rcu_read_unlock();
335         ucontext->closing = 0;
336
337 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
338         ucontext->umem_tree = RB_ROOT;
339         init_rwsem(&ucontext->umem_rwsem);
340         ucontext->odp_mrs_count = 0;
341         INIT_LIST_HEAD(&ucontext->no_private_counters);
342
343         ret = ib_query_device(ib_dev, &dev_attr);
344         if (ret)
345                 goto err_free;
346         if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
347                 ucontext->invalidate_range = NULL;
348
349 #endif
350
351         resp.num_comp_vectors = file->device->num_comp_vectors;
352
353         ret = get_unused_fd_flags(O_CLOEXEC);
354         if (ret < 0)
355                 goto err_free;
356         resp.async_fd = ret;
357
358         filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
359         if (IS_ERR(filp)) {
360                 ret = PTR_ERR(filp);
361                 goto err_fd;
362         }
363
364         if (copy_to_user((void __user *) (unsigned long) cmd.response,
365                          &resp, sizeof resp)) {
366                 ret = -EFAULT;
367                 goto err_file;
368         }
369
370         file->ucontext = ucontext;
371
372         fd_install(resp.async_fd, filp);
373
374         mutex_unlock(&file->mutex);
375
376         return in_len;
377
378 err_file:
379         ib_uverbs_free_async_event_file(file);
380         fput(filp);
381
382 err_fd:
383         put_unused_fd(resp.async_fd);
384
385 err_free:
386         put_pid(ucontext->tgid);
387         ib_dev->dealloc_ucontext(ucontext);
388
389 err:
390         mutex_unlock(&file->mutex);
391         return ret;
392 }
393
394 static void copy_query_dev_fields(struct ib_uverbs_file *file,
395                                   struct ib_device *ib_dev,
396                                   struct ib_uverbs_query_device_resp *resp,
397                                   struct ib_device_attr *attr)
398 {
399         resp->fw_ver            = attr->fw_ver;
400         resp->node_guid         = ib_dev->node_guid;
401         resp->sys_image_guid    = attr->sys_image_guid;
402         resp->max_mr_size       = attr->max_mr_size;
403         resp->page_size_cap     = attr->page_size_cap;
404         resp->vendor_id         = attr->vendor_id;
405         resp->vendor_part_id    = attr->vendor_part_id;
406         resp->hw_ver            = attr->hw_ver;
407         resp->max_qp            = attr->max_qp;
408         resp->max_qp_wr         = attr->max_qp_wr;
409         resp->device_cap_flags  = attr->device_cap_flags;
410         resp->max_sge           = attr->max_sge;
411         resp->max_sge_rd        = attr->max_sge_rd;
412         resp->max_cq            = attr->max_cq;
413         resp->max_cqe           = attr->max_cqe;
414         resp->max_mr            = attr->max_mr;
415         resp->max_pd            = attr->max_pd;
416         resp->max_qp_rd_atom    = attr->max_qp_rd_atom;
417         resp->max_ee_rd_atom    = attr->max_ee_rd_atom;
418         resp->max_res_rd_atom   = attr->max_res_rd_atom;
419         resp->max_qp_init_rd_atom       = attr->max_qp_init_rd_atom;
420         resp->max_ee_init_rd_atom       = attr->max_ee_init_rd_atom;
421         resp->atomic_cap                = attr->atomic_cap;
422         resp->max_ee                    = attr->max_ee;
423         resp->max_rdd                   = attr->max_rdd;
424         resp->max_mw                    = attr->max_mw;
425         resp->max_raw_ipv6_qp           = attr->max_raw_ipv6_qp;
426         resp->max_raw_ethy_qp           = attr->max_raw_ethy_qp;
427         resp->max_mcast_grp             = attr->max_mcast_grp;
428         resp->max_mcast_qp_attach       = attr->max_mcast_qp_attach;
429         resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
430         resp->max_ah                    = attr->max_ah;
431         resp->max_fmr                   = attr->max_fmr;
432         resp->max_map_per_fmr           = attr->max_map_per_fmr;
433         resp->max_srq                   = attr->max_srq;
434         resp->max_srq_wr                = attr->max_srq_wr;
435         resp->max_srq_sge               = attr->max_srq_sge;
436         resp->max_pkeys                 = attr->max_pkeys;
437         resp->local_ca_ack_delay        = attr->local_ca_ack_delay;
438         resp->phys_port_cnt             = ib_dev->phys_port_cnt;
439 }
440
441 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
442                                struct ib_device *ib_dev,
443                                const char __user *buf,
444                                int in_len, int out_len)
445 {
446         struct ib_uverbs_query_device      cmd;
447         struct ib_uverbs_query_device_resp resp;
448         struct ib_device_attr              attr;
449         int                                ret;
450
451         if (out_len < sizeof resp)
452                 return -ENOSPC;
453
454         if (copy_from_user(&cmd, buf, sizeof cmd))
455                 return -EFAULT;
456
457         ret = ib_query_device(ib_dev, &attr);
458         if (ret)
459                 return ret;
460
461         memset(&resp, 0, sizeof resp);
462         copy_query_dev_fields(file, ib_dev, &resp, &attr);
463
464         if (copy_to_user((void __user *) (unsigned long) cmd.response,
465                          &resp, sizeof resp))
466                 return -EFAULT;
467
468         return in_len;
469 }
470
471 ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
472                              struct ib_device *ib_dev,
473                              const char __user *buf,
474                              int in_len, int out_len)
475 {
476         struct ib_uverbs_query_port      cmd;
477         struct ib_uverbs_query_port_resp resp;
478         struct ib_port_attr              attr;
479         int                              ret;
480
481         if (out_len < sizeof resp)
482                 return -ENOSPC;
483
484         if (copy_from_user(&cmd, buf, sizeof cmd))
485                 return -EFAULT;
486
487         ret = ib_query_port(ib_dev, cmd.port_num, &attr);
488         if (ret)
489                 return ret;
490
491         memset(&resp, 0, sizeof resp);
492
493         resp.state           = attr.state;
494         resp.max_mtu         = attr.max_mtu;
495         resp.active_mtu      = attr.active_mtu;
496         resp.gid_tbl_len     = attr.gid_tbl_len;
497         resp.port_cap_flags  = attr.port_cap_flags;
498         resp.max_msg_sz      = attr.max_msg_sz;
499         resp.bad_pkey_cntr   = attr.bad_pkey_cntr;
500         resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
501         resp.pkey_tbl_len    = attr.pkey_tbl_len;
502         resp.lid             = attr.lid;
503         resp.sm_lid          = attr.sm_lid;
504         resp.lmc             = attr.lmc;
505         resp.max_vl_num      = attr.max_vl_num;
506         resp.sm_sl           = attr.sm_sl;
507         resp.subnet_timeout  = attr.subnet_timeout;
508         resp.init_type_reply = attr.init_type_reply;
509         resp.active_width    = attr.active_width;
510         resp.active_speed    = attr.active_speed;
511         resp.phys_state      = attr.phys_state;
512         resp.link_layer      = rdma_port_get_link_layer(ib_dev,
513                                                         cmd.port_num);
514
515         if (copy_to_user((void __user *) (unsigned long) cmd.response,
516                          &resp, sizeof resp))
517                 return -EFAULT;
518
519         return in_len;
520 }
521
522 ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
523                            struct ib_device *ib_dev,
524                            const char __user *buf,
525                            int in_len, int out_len)
526 {
527         struct ib_uverbs_alloc_pd      cmd;
528         struct ib_uverbs_alloc_pd_resp resp;
529         struct ib_udata                udata;
530         struct ib_uobject             *uobj;
531         struct ib_pd                  *pd;
532         int                            ret;
533
534         if (out_len < sizeof resp)
535                 return -ENOSPC;
536
537         if (copy_from_user(&cmd, buf, sizeof cmd))
538                 return -EFAULT;
539
540         INIT_UDATA(&udata, buf + sizeof cmd,
541                    (unsigned long) cmd.response + sizeof resp,
542                    in_len - sizeof cmd, out_len - sizeof resp);
543
544         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
545         if (!uobj)
546                 return -ENOMEM;
547
548         init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
549         down_write(&uobj->mutex);
550
551         pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
552         if (IS_ERR(pd)) {
553                 ret = PTR_ERR(pd);
554                 goto err;
555         }
556
557         pd->device  = ib_dev;
558         pd->uobject = uobj;
559         pd->local_mr = NULL;
560         atomic_set(&pd->usecnt, 0);
561
562         uobj->object = pd;
563         ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
564         if (ret)
565                 goto err_idr;
566
567         memset(&resp, 0, sizeof resp);
568         resp.pd_handle = uobj->id;
569
570         if (copy_to_user((void __user *) (unsigned long) cmd.response,
571                          &resp, sizeof resp)) {
572                 ret = -EFAULT;
573                 goto err_copy;
574         }
575
576         mutex_lock(&file->mutex);
577         list_add_tail(&uobj->list, &file->ucontext->pd_list);
578         mutex_unlock(&file->mutex);
579
580         uobj->live = 1;
581
582         up_write(&uobj->mutex);
583
584         return in_len;
585
586 err_copy:
587         idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
588
589 err_idr:
590         ib_dealloc_pd(pd);
591
592 err:
593         put_uobj_write(uobj);
594         return ret;
595 }
596
597 ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
598                              struct ib_device *ib_dev,
599                              const char __user *buf,
600                              int in_len, int out_len)
601 {
602         struct ib_uverbs_dealloc_pd cmd;
603         struct ib_uobject          *uobj;
604         struct ib_pd               *pd;
605         int                         ret;
606
607         if (copy_from_user(&cmd, buf, sizeof cmd))
608                 return -EFAULT;
609
610         uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
611         if (!uobj)
612                 return -EINVAL;
613         pd = uobj->object;
614
615         if (atomic_read(&pd->usecnt)) {
616                 ret = -EBUSY;
617                 goto err_put;
618         }
619
620         ret = pd->device->dealloc_pd(uobj->object);
621         WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
622         if (ret)
623                 goto err_put;
624
625         uobj->live = 0;
626         put_uobj_write(uobj);
627
628         idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
629
630         mutex_lock(&file->mutex);
631         list_del(&uobj->list);
632         mutex_unlock(&file->mutex);
633
634         put_uobj(uobj);
635
636         return in_len;
637
638 err_put:
639         put_uobj_write(uobj);
640         return ret;
641 }
642
643 struct xrcd_table_entry {
644         struct rb_node  node;
645         struct ib_xrcd *xrcd;
646         struct inode   *inode;
647 };
648
649 static int xrcd_table_insert(struct ib_uverbs_device *dev,
650                             struct inode *inode,
651                             struct ib_xrcd *xrcd)
652 {
653         struct xrcd_table_entry *entry, *scan;
654         struct rb_node **p = &dev->xrcd_tree.rb_node;
655         struct rb_node *parent = NULL;
656
657         entry = kmalloc(sizeof *entry, GFP_KERNEL);
658         if (!entry)
659                 return -ENOMEM;
660
661         entry->xrcd  = xrcd;
662         entry->inode = inode;
663
664         while (*p) {
665                 parent = *p;
666                 scan = rb_entry(parent, struct xrcd_table_entry, node);
667
668                 if (inode < scan->inode) {
669                         p = &(*p)->rb_left;
670                 } else if (inode > scan->inode) {
671                         p = &(*p)->rb_right;
672                 } else {
673                         kfree(entry);
674                         return -EEXIST;
675                 }
676         }
677
678         rb_link_node(&entry->node, parent, p);
679         rb_insert_color(&entry->node, &dev->xrcd_tree);
680         igrab(inode);
681         return 0;
682 }
683
684 static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
685                                                   struct inode *inode)
686 {
687         struct xrcd_table_entry *entry;
688         struct rb_node *p = dev->xrcd_tree.rb_node;
689
690         while (p) {
691                 entry = rb_entry(p, struct xrcd_table_entry, node);
692
693                 if (inode < entry->inode)
694                         p = p->rb_left;
695                 else if (inode > entry->inode)
696                         p = p->rb_right;
697                 else
698                         return entry;
699         }
700
701         return NULL;
702 }
703
704 static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
705 {
706         struct xrcd_table_entry *entry;
707
708         entry = xrcd_table_search(dev, inode);
709         if (!entry)
710                 return NULL;
711
712         return entry->xrcd;
713 }
714
715 static void xrcd_table_delete(struct ib_uverbs_device *dev,
716                               struct inode *inode)
717 {
718         struct xrcd_table_entry *entry;
719
720         entry = xrcd_table_search(dev, inode);
721         if (entry) {
722                 iput(inode);
723                 rb_erase(&entry->node, &dev->xrcd_tree);
724                 kfree(entry);
725         }
726 }
727
728 ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
729                             struct ib_device *ib_dev,
730                             const char __user *buf, int in_len,
731                             int out_len)
732 {
733         struct ib_uverbs_open_xrcd      cmd;
734         struct ib_uverbs_open_xrcd_resp resp;
735         struct ib_udata                 udata;
736         struct ib_uxrcd_object         *obj;
737         struct ib_xrcd                 *xrcd = NULL;
738         struct fd                       f = {NULL, 0};
739         struct inode                   *inode = NULL;
740         int                             ret = 0;
741         int                             new_xrcd = 0;
742
743         if (out_len < sizeof resp)
744                 return -ENOSPC;
745
746         if (copy_from_user(&cmd, buf, sizeof cmd))
747                 return -EFAULT;
748
749         INIT_UDATA(&udata, buf + sizeof cmd,
750                    (unsigned long) cmd.response + sizeof resp,
751                    in_len - sizeof cmd, out_len - sizeof  resp);
752
753         mutex_lock(&file->device->xrcd_tree_mutex);
754
755         if (cmd.fd != -1) {
756                 /* search for file descriptor */
757                 f = fdget(cmd.fd);
758                 if (!f.file) {
759                         ret = -EBADF;
760                         goto err_tree_mutex_unlock;
761                 }
762
763                 inode = file_inode(f.file);
764                 xrcd = find_xrcd(file->device, inode);
765                 if (!xrcd && !(cmd.oflags & O_CREAT)) {
766                         /* no file descriptor. Need CREATE flag */
767                         ret = -EAGAIN;
768                         goto err_tree_mutex_unlock;
769                 }
770
771                 if (xrcd && cmd.oflags & O_EXCL) {
772                         ret = -EINVAL;
773                         goto err_tree_mutex_unlock;
774                 }
775         }
776
777         obj = kmalloc(sizeof *obj, GFP_KERNEL);
778         if (!obj) {
779                 ret = -ENOMEM;
780                 goto err_tree_mutex_unlock;
781         }
782
783         init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
784
785         down_write(&obj->uobject.mutex);
786
787         if (!xrcd) {
788                 xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
789                 if (IS_ERR(xrcd)) {
790                         ret = PTR_ERR(xrcd);
791                         goto err;
792                 }
793
794                 xrcd->inode   = inode;
795                 xrcd->device  = ib_dev;
796                 atomic_set(&xrcd->usecnt, 0);
797                 mutex_init(&xrcd->tgt_qp_mutex);
798                 INIT_LIST_HEAD(&xrcd->tgt_qp_list);
799                 new_xrcd = 1;
800         }
801
802         atomic_set(&obj->refcnt, 0);
803         obj->uobject.object = xrcd;
804         ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
805         if (ret)
806                 goto err_idr;
807
808         memset(&resp, 0, sizeof resp);
809         resp.xrcd_handle = obj->uobject.id;
810
811         if (inode) {
812                 if (new_xrcd) {
813                         /* create new inode/xrcd table entry */
814                         ret = xrcd_table_insert(file->device, inode, xrcd);
815                         if (ret)
816                                 goto err_insert_xrcd;
817                 }
818                 atomic_inc(&xrcd->usecnt);
819         }
820
821         if (copy_to_user((void __user *) (unsigned long) cmd.response,
822                          &resp, sizeof resp)) {
823                 ret = -EFAULT;
824                 goto err_copy;
825         }
826
827         if (f.file)
828                 fdput(f);
829
830         mutex_lock(&file->mutex);
831         list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
832         mutex_unlock(&file->mutex);
833
834         obj->uobject.live = 1;
835         up_write(&obj->uobject.mutex);
836
837         mutex_unlock(&file->device->xrcd_tree_mutex);
838         return in_len;
839
840 err_copy:
841         if (inode) {
842                 if (new_xrcd)
843                         xrcd_table_delete(file->device, inode);
844                 atomic_dec(&xrcd->usecnt);
845         }
846
847 err_insert_xrcd:
848         idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
849
850 err_idr:
851         ib_dealloc_xrcd(xrcd);
852
853 err:
854         put_uobj_write(&obj->uobject);
855
856 err_tree_mutex_unlock:
857         if (f.file)
858                 fdput(f);
859
860         mutex_unlock(&file->device->xrcd_tree_mutex);
861
862         return ret;
863 }
864
865 ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
866                              struct ib_device *ib_dev,
867                              const char __user *buf, int in_len,
868                              int out_len)
869 {
870         struct ib_uverbs_close_xrcd cmd;
871         struct ib_uobject           *uobj;
872         struct ib_xrcd              *xrcd = NULL;
873         struct inode                *inode = NULL;
874         struct ib_uxrcd_object      *obj;
875         int                         live;
876         int                         ret = 0;
877
878         if (copy_from_user(&cmd, buf, sizeof cmd))
879                 return -EFAULT;
880
881         mutex_lock(&file->device->xrcd_tree_mutex);
882         uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
883         if (!uobj) {
884                 ret = -EINVAL;
885                 goto out;
886         }
887
888         xrcd  = uobj->object;
889         inode = xrcd->inode;
890         obj   = container_of(uobj, struct ib_uxrcd_object, uobject);
891         if (atomic_read(&obj->refcnt)) {
892                 put_uobj_write(uobj);
893                 ret = -EBUSY;
894                 goto out;
895         }
896
897         if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
898                 ret = ib_dealloc_xrcd(uobj->object);
899                 if (!ret)
900                         uobj->live = 0;
901         }
902
903         live = uobj->live;
904         if (inode && ret)
905                 atomic_inc(&xrcd->usecnt);
906
907         put_uobj_write(uobj);
908
909         if (ret)
910                 goto out;
911
912         if (inode && !live)
913                 xrcd_table_delete(file->device, inode);
914
915         idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
916         mutex_lock(&file->mutex);
917         list_del(&uobj->list);
918         mutex_unlock(&file->mutex);
919
920         put_uobj(uobj);
921         ret = in_len;
922
923 out:
924         mutex_unlock(&file->device->xrcd_tree_mutex);
925         return ret;
926 }
927
928 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
929                             struct ib_xrcd *xrcd)
930 {
931         struct inode *inode;
932
933         inode = xrcd->inode;
934         if (inode && !atomic_dec_and_test(&xrcd->usecnt))
935                 return;
936
937         ib_dealloc_xrcd(xrcd);
938
939         if (inode)
940                 xrcd_table_delete(dev, inode);
941 }
942
943 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
944                          struct ib_device *ib_dev,
945                          const char __user *buf, int in_len,
946                          int out_len)
947 {
948         struct ib_uverbs_reg_mr      cmd;
949         struct ib_uverbs_reg_mr_resp resp;
950         struct ib_udata              udata;
951         struct ib_uobject           *uobj;
952         struct ib_pd                *pd;
953         struct ib_mr                *mr;
954         int                          ret;
955
956         if (out_len < sizeof resp)
957                 return -ENOSPC;
958
959         if (copy_from_user(&cmd, buf, sizeof cmd))
960                 return -EFAULT;
961
962         INIT_UDATA(&udata, buf + sizeof cmd,
963                    (unsigned long) cmd.response + sizeof resp,
964                    in_len - sizeof cmd, out_len - sizeof resp);
965
966         if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
967                 return -EINVAL;
968
969         ret = ib_check_mr_access(cmd.access_flags);
970         if (ret)
971                 return ret;
972
973         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
974         if (!uobj)
975                 return -ENOMEM;
976
977         init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
978         down_write(&uobj->mutex);
979
980         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
981         if (!pd) {
982                 ret = -EINVAL;
983                 goto err_free;
984         }
985
986         if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
987                 struct ib_device_attr attr;
988
989                 ret = ib_query_device(pd->device, &attr);
990                 if (ret || !(attr.device_cap_flags &
991                                 IB_DEVICE_ON_DEMAND_PAGING)) {
992                         pr_debug("ODP support not available\n");
993                         ret = -EINVAL;
994                         goto err_put;
995                 }
996         }
997
998         mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
999                                      cmd.access_flags, &udata);
1000         if (IS_ERR(mr)) {
1001                 ret = PTR_ERR(mr);
1002                 goto err_put;
1003         }
1004
1005         mr->device  = pd->device;
1006         mr->pd      = pd;
1007         mr->uobject = uobj;
1008         atomic_inc(&pd->usecnt);
1009         atomic_set(&mr->usecnt, 0);
1010
1011         uobj->object = mr;
1012         ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
1013         if (ret)
1014                 goto err_unreg;
1015
1016         memset(&resp, 0, sizeof resp);
1017         resp.lkey      = mr->lkey;
1018         resp.rkey      = mr->rkey;
1019         resp.mr_handle = uobj->id;
1020
1021         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1022                          &resp, sizeof resp)) {
1023                 ret = -EFAULT;
1024                 goto err_copy;
1025         }
1026
1027         put_pd_read(pd);
1028
1029         mutex_lock(&file->mutex);
1030         list_add_tail(&uobj->list, &file->ucontext->mr_list);
1031         mutex_unlock(&file->mutex);
1032
1033         uobj->live = 1;
1034
1035         up_write(&uobj->mutex);
1036
1037         return in_len;
1038
1039 err_copy:
1040         idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
1041
1042 err_unreg:
1043         ib_dereg_mr(mr);
1044
1045 err_put:
1046         put_pd_read(pd);
1047
1048 err_free:
1049         put_uobj_write(uobj);
1050         return ret;
1051 }
1052
1053 ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
1054                            struct ib_device *ib_dev,
1055                            const char __user *buf, int in_len,
1056                            int out_len)
1057 {
1058         struct ib_uverbs_rereg_mr      cmd;
1059         struct ib_uverbs_rereg_mr_resp resp;
1060         struct ib_udata              udata;
1061         struct ib_pd                *pd = NULL;
1062         struct ib_mr                *mr;
1063         struct ib_pd                *old_pd;
1064         int                          ret;
1065         struct ib_uobject           *uobj;
1066
1067         if (out_len < sizeof(resp))
1068                 return -ENOSPC;
1069
1070         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1071                 return -EFAULT;
1072
1073         INIT_UDATA(&udata, buf + sizeof(cmd),
1074                    (unsigned long) cmd.response + sizeof(resp),
1075                    in_len - sizeof(cmd), out_len - sizeof(resp));
1076
1077         if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
1078                 return -EINVAL;
1079
1080         if ((cmd.flags & IB_MR_REREG_TRANS) &&
1081             (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
1082              (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
1083                         return -EINVAL;
1084
1085         uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
1086                               file->ucontext);
1087
1088         if (!uobj)
1089                 return -EINVAL;
1090
1091         mr = uobj->object;
1092
1093         if (cmd.flags & IB_MR_REREG_ACCESS) {
1094                 ret = ib_check_mr_access(cmd.access_flags);
1095                 if (ret)
1096                         goto put_uobjs;
1097         }
1098
1099         if (cmd.flags & IB_MR_REREG_PD) {
1100                 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1101                 if (!pd) {
1102                         ret = -EINVAL;
1103                         goto put_uobjs;
1104                 }
1105         }
1106
1107         if (atomic_read(&mr->usecnt)) {
1108                 ret = -EBUSY;
1109                 goto put_uobj_pd;
1110         }
1111
1112         old_pd = mr->pd;
1113         ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
1114                                         cmd.length, cmd.hca_va,
1115                                         cmd.access_flags, pd, &udata);
1116         if (!ret) {
1117                 if (cmd.flags & IB_MR_REREG_PD) {
1118                         atomic_inc(&pd->usecnt);
1119                         mr->pd = pd;
1120                         atomic_dec(&old_pd->usecnt);
1121                 }
1122         } else {
1123                 goto put_uobj_pd;
1124         }
1125
1126         memset(&resp, 0, sizeof(resp));
1127         resp.lkey      = mr->lkey;
1128         resp.rkey      = mr->rkey;
1129
1130         if (copy_to_user((void __user *)(unsigned long)cmd.response,
1131                          &resp, sizeof(resp)))
1132                 ret = -EFAULT;
1133         else
1134                 ret = in_len;
1135
1136 put_uobj_pd:
1137         if (cmd.flags & IB_MR_REREG_PD)
1138                 put_pd_read(pd);
1139
1140 put_uobjs:
1141
1142         put_uobj_write(mr->uobject);
1143
1144         return ret;
1145 }
1146
1147 ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
1148                            struct ib_device *ib_dev,
1149                            const char __user *buf, int in_len,
1150                            int out_len)
1151 {
1152         struct ib_uverbs_dereg_mr cmd;
1153         struct ib_mr             *mr;
1154         struct ib_uobject        *uobj;
1155         int                       ret = -EINVAL;
1156
1157         if (copy_from_user(&cmd, buf, sizeof cmd))
1158                 return -EFAULT;
1159
1160         uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
1161         if (!uobj)
1162                 return -EINVAL;
1163
1164         mr = uobj->object;
1165
1166         ret = ib_dereg_mr(mr);
1167         if (!ret)
1168                 uobj->live = 0;
1169
1170         put_uobj_write(uobj);
1171
1172         if (ret)
1173                 return ret;
1174
1175         idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
1176
1177         mutex_lock(&file->mutex);
1178         list_del(&uobj->list);
1179         mutex_unlock(&file->mutex);
1180
1181         put_uobj(uobj);
1182
1183         return in_len;
1184 }
1185
1186 ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
1187                            struct ib_device *ib_dev,
1188                            const char __user *buf, int in_len,
1189                            int out_len)
1190 {
1191         struct ib_uverbs_alloc_mw      cmd;
1192         struct ib_uverbs_alloc_mw_resp resp;
1193         struct ib_uobject             *uobj;
1194         struct ib_pd                  *pd;
1195         struct ib_mw                  *mw;
1196         int                            ret;
1197
1198         if (out_len < sizeof(resp))
1199                 return -ENOSPC;
1200
1201         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1202                 return -EFAULT;
1203
1204         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
1205         if (!uobj)
1206                 return -ENOMEM;
1207
1208         init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
1209         down_write(&uobj->mutex);
1210
1211         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1212         if (!pd) {
1213                 ret = -EINVAL;
1214                 goto err_free;
1215         }
1216
1217         mw = pd->device->alloc_mw(pd, cmd.mw_type);
1218         if (IS_ERR(mw)) {
1219                 ret = PTR_ERR(mw);
1220                 goto err_put;
1221         }
1222
1223         mw->device  = pd->device;
1224         mw->pd      = pd;
1225         mw->uobject = uobj;
1226         atomic_inc(&pd->usecnt);
1227
1228         uobj->object = mw;
1229         ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
1230         if (ret)
1231                 goto err_unalloc;
1232
1233         memset(&resp, 0, sizeof(resp));
1234         resp.rkey      = mw->rkey;
1235         resp.mw_handle = uobj->id;
1236
1237         if (copy_to_user((void __user *)(unsigned long)cmd.response,
1238                          &resp, sizeof(resp))) {
1239                 ret = -EFAULT;
1240                 goto err_copy;
1241         }
1242
1243         put_pd_read(pd);
1244
1245         mutex_lock(&file->mutex);
1246         list_add_tail(&uobj->list, &file->ucontext->mw_list);
1247         mutex_unlock(&file->mutex);
1248
1249         uobj->live = 1;
1250
1251         up_write(&uobj->mutex);
1252
1253         return in_len;
1254
1255 err_copy:
1256         idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1257
1258 err_unalloc:
1259         ib_dealloc_mw(mw);
1260
1261 err_put:
1262         put_pd_read(pd);
1263
1264 err_free:
1265         put_uobj_write(uobj);
1266         return ret;
1267 }
1268
1269 ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
1270                              struct ib_device *ib_dev,
1271                              const char __user *buf, int in_len,
1272                              int out_len)
1273 {
1274         struct ib_uverbs_dealloc_mw cmd;
1275         struct ib_mw               *mw;
1276         struct ib_uobject          *uobj;
1277         int                         ret = -EINVAL;
1278
1279         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1280                 return -EFAULT;
1281
1282         uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
1283         if (!uobj)
1284                 return -EINVAL;
1285
1286         mw = uobj->object;
1287
1288         ret = ib_dealloc_mw(mw);
1289         if (!ret)
1290                 uobj->live = 0;
1291
1292         put_uobj_write(uobj);
1293
1294         if (ret)
1295                 return ret;
1296
1297         idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1298
1299         mutex_lock(&file->mutex);
1300         list_del(&uobj->list);
1301         mutex_unlock(&file->mutex);
1302
1303         put_uobj(uobj);
1304
1305         return in_len;
1306 }
1307
1308 ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
1309                                       struct ib_device *ib_dev,
1310                                       const char __user *buf, int in_len,
1311                                       int out_len)
1312 {
1313         struct ib_uverbs_create_comp_channel       cmd;
1314         struct ib_uverbs_create_comp_channel_resp  resp;
1315         struct file                               *filp;
1316         int ret;
1317
1318         if (out_len < sizeof resp)
1319                 return -ENOSPC;
1320
1321         if (copy_from_user(&cmd, buf, sizeof cmd))
1322                 return -EFAULT;
1323
1324         ret = get_unused_fd_flags(O_CLOEXEC);
1325         if (ret < 0)
1326                 return ret;
1327         resp.fd = ret;
1328
1329         filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
1330         if (IS_ERR(filp)) {
1331                 put_unused_fd(resp.fd);
1332                 return PTR_ERR(filp);
1333         }
1334
1335         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1336                          &resp, sizeof resp)) {
1337                 put_unused_fd(resp.fd);
1338                 fput(filp);
1339                 return -EFAULT;
1340         }
1341
1342         fd_install(resp.fd, filp);
1343         return in_len;
1344 }
1345
1346 static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
1347                                         struct ib_device *ib_dev,
1348                                        struct ib_udata *ucore,
1349                                        struct ib_udata *uhw,
1350                                        struct ib_uverbs_ex_create_cq *cmd,
1351                                        size_t cmd_sz,
1352                                        int (*cb)(struct ib_uverbs_file *file,
1353                                                  struct ib_ucq_object *obj,
1354                                                  struct ib_uverbs_ex_create_cq_resp *resp,
1355                                                  struct ib_udata *udata,
1356                                                  void *context),
1357                                        void *context)
1358 {
1359         struct ib_ucq_object           *obj;
1360         struct ib_uverbs_event_file    *ev_file = NULL;
1361         struct ib_cq                   *cq;
1362         int                             ret;
1363         struct ib_uverbs_ex_create_cq_resp resp;
1364         struct ib_cq_init_attr attr = {};
1365
1366         if (cmd->comp_vector >= file->device->num_comp_vectors)
1367                 return ERR_PTR(-EINVAL);
1368
1369         obj = kmalloc(sizeof *obj, GFP_KERNEL);
1370         if (!obj)
1371                 return ERR_PTR(-ENOMEM);
1372
1373         init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
1374         down_write(&obj->uobject.mutex);
1375
1376         if (cmd->comp_channel >= 0) {
1377                 ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
1378                 if (!ev_file) {
1379                         ret = -EINVAL;
1380                         goto err;
1381                 }
1382         }
1383
1384         obj->uverbs_file           = file;
1385         obj->comp_events_reported  = 0;
1386         obj->async_events_reported = 0;
1387         INIT_LIST_HEAD(&obj->comp_list);
1388         INIT_LIST_HEAD(&obj->async_list);
1389
1390         attr.cqe = cmd->cqe;
1391         attr.comp_vector = cmd->comp_vector;
1392
1393         if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
1394                 attr.flags = cmd->flags;
1395
1396         cq = ib_dev->create_cq(ib_dev, &attr,
1397                                              file->ucontext, uhw);
1398         if (IS_ERR(cq)) {
1399                 ret = PTR_ERR(cq);
1400                 goto err_file;
1401         }
1402
1403         cq->device        = ib_dev;
1404         cq->uobject       = &obj->uobject;
1405         cq->comp_handler  = ib_uverbs_comp_handler;
1406         cq->event_handler = ib_uverbs_cq_event_handler;
1407         cq->cq_context    = ev_file;
1408         atomic_set(&cq->usecnt, 0);
1409
1410         obj->uobject.object = cq;
1411         ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
1412         if (ret)
1413                 goto err_free;
1414
1415         memset(&resp, 0, sizeof resp);
1416         resp.base.cq_handle = obj->uobject.id;
1417         resp.base.cqe       = cq->cqe;
1418
1419         resp.response_length = offsetof(typeof(resp), response_length) +
1420                 sizeof(resp.response_length);
1421
1422         ret = cb(file, obj, &resp, ucore, context);
1423         if (ret)
1424                 goto err_cb;
1425
1426         mutex_lock(&file->mutex);
1427         list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
1428         mutex_unlock(&file->mutex);
1429
1430         obj->uobject.live = 1;
1431
1432         up_write(&obj->uobject.mutex);
1433
1434         return obj;
1435
1436 err_cb:
1437         idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
1438
1439 err_free:
1440         ib_destroy_cq(cq);
1441
1442 err_file:
1443         if (ev_file)
1444                 ib_uverbs_release_ucq(file, ev_file, obj);
1445
1446 err:
1447         put_uobj_write(&obj->uobject);
1448
1449         return ERR_PTR(ret);
1450 }
1451
1452 static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
1453                                   struct ib_ucq_object *obj,
1454                                   struct ib_uverbs_ex_create_cq_resp *resp,
1455                                   struct ib_udata *ucore, void *context)
1456 {
1457         if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1458                 return -EFAULT;
1459
1460         return 0;
1461 }
1462
1463 ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1464                             struct ib_device *ib_dev,
1465                             const char __user *buf, int in_len,
1466                             int out_len)
1467 {
1468         struct ib_uverbs_create_cq      cmd;
1469         struct ib_uverbs_ex_create_cq   cmd_ex;
1470         struct ib_uverbs_create_cq_resp resp;
1471         struct ib_udata                 ucore;
1472         struct ib_udata                 uhw;
1473         struct ib_ucq_object           *obj;
1474
1475         if (out_len < sizeof(resp))
1476                 return -ENOSPC;
1477
1478         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1479                 return -EFAULT;
1480
1481         INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
1482
1483         INIT_UDATA(&uhw, buf + sizeof(cmd),
1484                    (unsigned long)cmd.response + sizeof(resp),
1485                    in_len - sizeof(cmd), out_len - sizeof(resp));
1486
1487         memset(&cmd_ex, 0, sizeof(cmd_ex));
1488         cmd_ex.user_handle = cmd.user_handle;
1489         cmd_ex.cqe = cmd.cqe;
1490         cmd_ex.comp_vector = cmd.comp_vector;
1491         cmd_ex.comp_channel = cmd.comp_channel;
1492
1493         obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
1494                         offsetof(typeof(cmd_ex), comp_channel) +
1495                         sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
1496                         NULL);
1497
1498         if (IS_ERR(obj))
1499                 return PTR_ERR(obj);
1500
1501         return in_len;
1502 }
1503
1504 static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
1505                                      struct ib_ucq_object *obj,
1506                                      struct ib_uverbs_ex_create_cq_resp *resp,
1507                                      struct ib_udata *ucore, void *context)
1508 {
1509         if (ib_copy_to_udata(ucore, resp, resp->response_length))
1510                 return -EFAULT;
1511
1512         return 0;
1513 }
1514
1515 int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
1516                          struct ib_device *ib_dev,
1517                            struct ib_udata *ucore,
1518                            struct ib_udata *uhw)
1519 {
1520         struct ib_uverbs_ex_create_cq_resp resp;
1521         struct ib_uverbs_ex_create_cq  cmd;
1522         struct ib_ucq_object           *obj;
1523         int err;
1524
1525         if (ucore->inlen < sizeof(cmd))
1526                 return -EINVAL;
1527
1528         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
1529         if (err)
1530                 return err;
1531
1532         if (cmd.comp_mask)
1533                 return -EINVAL;
1534
1535         if (cmd.reserved)
1536                 return -EINVAL;
1537
1538         if (ucore->outlen < (offsetof(typeof(resp), response_length) +
1539                              sizeof(resp.response_length)))
1540                 return -ENOSPC;
1541
1542         obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
1543                         min(ucore->inlen, sizeof(cmd)),
1544                         ib_uverbs_ex_create_cq_cb, NULL);
1545
1546         if (IS_ERR(obj))
1547                 return PTR_ERR(obj);
1548
1549         return 0;
1550 }
1551
1552 ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
1553                             struct ib_device *ib_dev,
1554                             const char __user *buf, int in_len,
1555                             int out_len)
1556 {
1557         struct ib_uverbs_resize_cq      cmd;
1558         struct ib_uverbs_resize_cq_resp resp;
1559         struct ib_udata                 udata;
1560         struct ib_cq                    *cq;
1561         int                             ret = -EINVAL;
1562
1563         if (copy_from_user(&cmd, buf, sizeof cmd))
1564                 return -EFAULT;
1565
1566         INIT_UDATA(&udata, buf + sizeof cmd,
1567                    (unsigned long) cmd.response + sizeof resp,
1568                    in_len - sizeof cmd, out_len - sizeof resp);
1569
1570         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1571         if (!cq)
1572                 return -EINVAL;
1573
1574         ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
1575         if (ret)
1576                 goto out;
1577
1578         resp.cqe = cq->cqe;
1579
1580         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1581                          &resp, sizeof resp.cqe))
1582                 ret = -EFAULT;
1583
1584 out:
1585         put_cq_read(cq);
1586
1587         return ret ? ret : in_len;
1588 }
1589
1590 static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
1591 {
1592         struct ib_uverbs_wc tmp;
1593
1594         tmp.wr_id               = wc->wr_id;
1595         tmp.status              = wc->status;
1596         tmp.opcode              = wc->opcode;
1597         tmp.vendor_err          = wc->vendor_err;
1598         tmp.byte_len            = wc->byte_len;
1599         tmp.ex.imm_data         = (__u32 __force) wc->ex.imm_data;
1600         tmp.qp_num              = wc->qp->qp_num;
1601         tmp.src_qp              = wc->src_qp;
1602         tmp.wc_flags            = wc->wc_flags;
1603         tmp.pkey_index          = wc->pkey_index;
1604         tmp.slid                = wc->slid;
1605         tmp.sl                  = wc->sl;
1606         tmp.dlid_path_bits      = wc->dlid_path_bits;
1607         tmp.port_num            = wc->port_num;
1608         tmp.reserved            = 0;
1609
1610         if (copy_to_user(dest, &tmp, sizeof tmp))
1611                 return -EFAULT;
1612
1613         return 0;
1614 }
1615
1616 ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
1617                           struct ib_device *ib_dev,
1618                           const char __user *buf, int in_len,
1619                           int out_len)
1620 {
1621         struct ib_uverbs_poll_cq       cmd;
1622         struct ib_uverbs_poll_cq_resp  resp;
1623         u8 __user                     *header_ptr;
1624         u8 __user                     *data_ptr;
1625         struct ib_cq                  *cq;
1626         struct ib_wc                   wc;
1627         int                            ret;
1628
1629         if (copy_from_user(&cmd, buf, sizeof cmd))
1630                 return -EFAULT;
1631
1632         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1633         if (!cq)
1634                 return -EINVAL;
1635
1636         /* we copy a struct ib_uverbs_poll_cq_resp to user space */
1637         header_ptr = (void __user *)(unsigned long) cmd.response;
1638         data_ptr = header_ptr + sizeof resp;
1639
1640         memset(&resp, 0, sizeof resp);
1641         while (resp.count < cmd.ne) {
1642                 ret = ib_poll_cq(cq, 1, &wc);
1643                 if (ret < 0)
1644                         goto out_put;
1645                 if (!ret)
1646                         break;
1647
1648                 ret = copy_wc_to_user(data_ptr, &wc);
1649                 if (ret)
1650                         goto out_put;
1651
1652                 data_ptr += sizeof(struct ib_uverbs_wc);
1653                 ++resp.count;
1654         }
1655
1656         if (copy_to_user(header_ptr, &resp, sizeof resp)) {
1657                 ret = -EFAULT;
1658                 goto out_put;
1659         }
1660
1661         ret = in_len;
1662
1663 out_put:
1664         put_cq_read(cq);
1665         return ret;
1666 }
1667
1668 ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
1669                                 struct ib_device *ib_dev,
1670                                 const char __user *buf, int in_len,
1671                                 int out_len)
1672 {
1673         struct ib_uverbs_req_notify_cq cmd;
1674         struct ib_cq                  *cq;
1675
1676         if (copy_from_user(&cmd, buf, sizeof cmd))
1677                 return -EFAULT;
1678
1679         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1680         if (!cq)
1681                 return -EINVAL;
1682
1683         ib_req_notify_cq(cq, cmd.solicited_only ?
1684                          IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
1685
1686         put_cq_read(cq);
1687
1688         return in_len;
1689 }
1690
1691 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
1692                              struct ib_device *ib_dev,
1693                              const char __user *buf, int in_len,
1694                              int out_len)
1695 {
1696         struct ib_uverbs_destroy_cq      cmd;
1697         struct ib_uverbs_destroy_cq_resp resp;
1698         struct ib_uobject               *uobj;
1699         struct ib_cq                    *cq;
1700         struct ib_ucq_object            *obj;
1701         struct ib_uverbs_event_file     *ev_file;
1702         int                              ret = -EINVAL;
1703
1704         if (copy_from_user(&cmd, buf, sizeof cmd))
1705                 return -EFAULT;
1706
1707         uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
1708         if (!uobj)
1709                 return -EINVAL;
1710         cq      = uobj->object;
1711         ev_file = cq->cq_context;
1712         obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
1713
1714         ret = ib_destroy_cq(cq);
1715         if (!ret)
1716                 uobj->live = 0;
1717
1718         put_uobj_write(uobj);
1719
1720         if (ret)
1721                 return ret;
1722
1723         idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
1724
1725         mutex_lock(&file->mutex);
1726         list_del(&uobj->list);
1727         mutex_unlock(&file->mutex);
1728
1729         ib_uverbs_release_ucq(file, ev_file, obj);
1730
1731         memset(&resp, 0, sizeof resp);
1732         resp.comp_events_reported  = obj->comp_events_reported;
1733         resp.async_events_reported = obj->async_events_reported;
1734
1735         put_uobj(uobj);
1736
1737         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1738                          &resp, sizeof resp))
1739                 return -EFAULT;
1740
1741         return in_len;
1742 }
1743
1744 static int create_qp(struct ib_uverbs_file *file,
1745                      struct ib_udata *ucore,
1746                      struct ib_udata *uhw,
1747                      struct ib_uverbs_ex_create_qp *cmd,
1748                      size_t cmd_sz,
1749                      int (*cb)(struct ib_uverbs_file *file,
1750                                struct ib_uverbs_ex_create_qp_resp *resp,
1751                                struct ib_udata *udata),
1752                      void *context)
1753 {
1754         struct ib_uqp_object            *obj;
1755         struct ib_device                *device;
1756         struct ib_pd                    *pd = NULL;
1757         struct ib_xrcd                  *xrcd = NULL;
1758         struct ib_uobject               *uninitialized_var(xrcd_uobj);
1759         struct ib_cq                    *scq = NULL, *rcq = NULL;
1760         struct ib_srq                   *srq = NULL;
1761         struct ib_qp                    *qp;
1762         char                            *buf;
1763         struct ib_qp_init_attr          attr;
1764         struct ib_uverbs_ex_create_qp_resp resp;
1765         int                             ret;
1766
1767         if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
1768                 return -EPERM;
1769
1770         obj = kzalloc(sizeof *obj, GFP_KERNEL);
1771         if (!obj)
1772                 return -ENOMEM;
1773
1774         init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
1775                   &qp_lock_class);
1776         down_write(&obj->uevent.uobject.mutex);
1777
1778         if (cmd->qp_type == IB_QPT_XRC_TGT) {
1779                 xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
1780                                      &xrcd_uobj);
1781                 if (!xrcd) {
1782                         ret = -EINVAL;
1783                         goto err_put;
1784                 }
1785                 device = xrcd->device;
1786         } else {
1787                 if (cmd->qp_type == IB_QPT_XRC_INI) {
1788                         cmd->max_recv_wr = 0;
1789                         cmd->max_recv_sge = 0;
1790                 } else {
1791                         if (cmd->is_srq) {
1792                                 srq = idr_read_srq(cmd->srq_handle,
1793                                                    file->ucontext);
1794                                 if (!srq || srq->srq_type != IB_SRQT_BASIC) {
1795                                         ret = -EINVAL;
1796                                         goto err_put;
1797                                 }
1798                         }
1799
1800                         if (cmd->recv_cq_handle != cmd->send_cq_handle) {
1801                                 rcq = idr_read_cq(cmd->recv_cq_handle,
1802                                                   file->ucontext, 0);
1803                                 if (!rcq) {
1804                                         ret = -EINVAL;
1805                                         goto err_put;
1806                                 }
1807                         }
1808                 }
1809
1810                 scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
1811                 rcq = rcq ?: scq;
1812                 pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
1813                 if (!pd || !scq) {
1814                         ret = -EINVAL;
1815                         goto err_put;
1816                 }
1817
1818                 device = pd->device;
1819         }
1820
1821         attr.event_handler = ib_uverbs_qp_event_handler;
1822         attr.qp_context    = file;
1823         attr.send_cq       = scq;
1824         attr.recv_cq       = rcq;
1825         attr.srq           = srq;
1826         attr.xrcd          = xrcd;
1827         attr.sq_sig_type   = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
1828                                               IB_SIGNAL_REQ_WR;
1829         attr.qp_type       = cmd->qp_type;
1830         attr.create_flags  = 0;
1831
1832         attr.cap.max_send_wr     = cmd->max_send_wr;
1833         attr.cap.max_recv_wr     = cmd->max_recv_wr;
1834         attr.cap.max_send_sge    = cmd->max_send_sge;
1835         attr.cap.max_recv_sge    = cmd->max_recv_sge;
1836         attr.cap.max_inline_data = cmd->max_inline_data;
1837
1838         obj->uevent.events_reported     = 0;
1839         INIT_LIST_HEAD(&obj->uevent.event_list);
1840         INIT_LIST_HEAD(&obj->mcast_list);
1841
1842         if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
1843                       sizeof(cmd->create_flags))
1844                 attr.create_flags = cmd->create_flags;
1845
1846         if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1847                 ret = -EINVAL;
1848                 goto err_put;
1849         }
1850
1851         buf = (void *)cmd + sizeof(*cmd);
1852         if (cmd_sz > sizeof(*cmd))
1853                 if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
1854                                              cmd_sz - sizeof(*cmd) - 1))) {
1855                         ret = -EINVAL;
1856                         goto err_put;
1857                 }
1858
1859         if (cmd->qp_type == IB_QPT_XRC_TGT)
1860                 qp = ib_create_qp(pd, &attr);
1861         else
1862                 qp = device->create_qp(pd, &attr, uhw);
1863
1864         if (IS_ERR(qp)) {
1865                 ret = PTR_ERR(qp);
1866                 goto err_put;
1867         }
1868
1869         if (cmd->qp_type != IB_QPT_XRC_TGT) {
1870                 qp->real_qp       = qp;
1871                 qp->device        = device;
1872                 qp->pd            = pd;
1873                 qp->send_cq       = attr.send_cq;
1874                 qp->recv_cq       = attr.recv_cq;
1875                 qp->srq           = attr.srq;
1876                 qp->event_handler = attr.event_handler;
1877                 qp->qp_context    = attr.qp_context;
1878                 qp->qp_type       = attr.qp_type;
1879                 atomic_set(&qp->usecnt, 0);
1880                 atomic_inc(&pd->usecnt);
1881                 atomic_inc(&attr.send_cq->usecnt);
1882                 if (attr.recv_cq)
1883                         atomic_inc(&attr.recv_cq->usecnt);
1884                 if (attr.srq)
1885                         atomic_inc(&attr.srq->usecnt);
1886         }
1887         qp->uobject = &obj->uevent.uobject;
1888
1889         obj->uevent.uobject.object = qp;
1890         ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1891         if (ret)
1892                 goto err_destroy;
1893
1894         memset(&resp, 0, sizeof resp);
1895         resp.base.qpn             = qp->qp_num;
1896         resp.base.qp_handle       = obj->uevent.uobject.id;
1897         resp.base.max_recv_sge    = attr.cap.max_recv_sge;
1898         resp.base.max_send_sge    = attr.cap.max_send_sge;
1899         resp.base.max_recv_wr     = attr.cap.max_recv_wr;
1900         resp.base.max_send_wr     = attr.cap.max_send_wr;
1901         resp.base.max_inline_data = attr.cap.max_inline_data;
1902
1903         resp.response_length = offsetof(typeof(resp), response_length) +
1904                                sizeof(resp.response_length);
1905
1906         ret = cb(file, &resp, ucore);
1907         if (ret)
1908                 goto err_cb;
1909
1910         if (xrcd) {
1911                 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
1912                                           uobject);
1913                 atomic_inc(&obj->uxrcd->refcnt);
1914                 put_xrcd_read(xrcd_uobj);
1915         }
1916
1917         if (pd)
1918                 put_pd_read(pd);
1919         if (scq)
1920                 put_cq_read(scq);
1921         if (rcq && rcq != scq)
1922                 put_cq_read(rcq);
1923         if (srq)
1924                 put_srq_read(srq);
1925
1926         mutex_lock(&file->mutex);
1927         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
1928         mutex_unlock(&file->mutex);
1929
1930         obj->uevent.uobject.live = 1;
1931
1932         up_write(&obj->uevent.uobject.mutex);
1933
1934         return 0;
1935 err_cb:
1936         idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1937
1938 err_destroy:
1939         ib_destroy_qp(qp);
1940
1941 err_put:
1942         if (xrcd)
1943                 put_xrcd_read(xrcd_uobj);
1944         if (pd)
1945                 put_pd_read(pd);
1946         if (scq)
1947                 put_cq_read(scq);
1948         if (rcq && rcq != scq)
1949                 put_cq_read(rcq);
1950         if (srq)
1951                 put_srq_read(srq);
1952
1953         put_uobj_write(&obj->uevent.uobject);
1954         return ret;
1955 }
1956
1957 static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
1958                                   struct ib_uverbs_ex_create_qp_resp *resp,
1959                                   struct ib_udata *ucore)
1960 {
1961         if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1962                 return -EFAULT;
1963
1964         return 0;
1965 }
1966
1967 ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1968                             struct ib_device *ib_dev,
1969                             const char __user *buf, int in_len,
1970                             int out_len)
1971 {
1972         struct ib_uverbs_create_qp      cmd;
1973         struct ib_uverbs_ex_create_qp   cmd_ex;
1974         struct ib_udata                 ucore;
1975         struct ib_udata                 uhw;
1976         ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
1977         int                             err;
1978
1979         if (out_len < resp_size)
1980                 return -ENOSPC;
1981
1982         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1983                 return -EFAULT;
1984
1985         INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
1986                    resp_size);
1987         INIT_UDATA(&uhw, buf + sizeof(cmd),
1988                    (unsigned long)cmd.response + resp_size,
1989                    in_len - sizeof(cmd), out_len - resp_size);
1990
1991         memset(&cmd_ex, 0, sizeof(cmd_ex));
1992         cmd_ex.user_handle = cmd.user_handle;
1993         cmd_ex.pd_handle = cmd.pd_handle;
1994         cmd_ex.send_cq_handle = cmd.send_cq_handle;
1995         cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
1996         cmd_ex.srq_handle = cmd.srq_handle;
1997         cmd_ex.max_send_wr = cmd.max_send_wr;
1998         cmd_ex.max_recv_wr = cmd.max_recv_wr;
1999         cmd_ex.max_send_sge = cmd.max_send_sge;
2000         cmd_ex.max_recv_sge = cmd.max_recv_sge;
2001         cmd_ex.max_inline_data = cmd.max_inline_data;
2002         cmd_ex.sq_sig_all = cmd.sq_sig_all;
2003         cmd_ex.qp_type = cmd.qp_type;
2004         cmd_ex.is_srq = cmd.is_srq;
2005
2006         err = create_qp(file, &ucore, &uhw, &cmd_ex,
2007                         offsetof(typeof(cmd_ex), is_srq) +
2008                         sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
2009                         NULL);
2010
2011         if (err)
2012                 return err;
2013
2014         return in_len;
2015 }
2016
2017 static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
2018                                      struct ib_uverbs_ex_create_qp_resp *resp,
2019                                      struct ib_udata *ucore)
2020 {
2021         if (ib_copy_to_udata(ucore, resp, resp->response_length))
2022                 return -EFAULT;
2023
2024         return 0;
2025 }
2026
2027 int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
2028                            struct ib_device *ib_dev,
2029                            struct ib_udata *ucore,
2030                            struct ib_udata *uhw)
2031 {
2032         struct ib_uverbs_ex_create_qp_resp resp;
2033         struct ib_uverbs_ex_create_qp cmd = {0};
2034         int err;
2035
2036         if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
2037                             sizeof(cmd.comp_mask)))
2038                 return -EINVAL;
2039
2040         err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
2041         if (err)
2042                 return err;
2043
2044         if (cmd.comp_mask)
2045                 return -EINVAL;
2046
2047         if (cmd.reserved)
2048                 return -EINVAL;
2049
2050         if (ucore->outlen < (offsetof(typeof(resp), response_length) +
2051                              sizeof(resp.response_length)))
2052                 return -ENOSPC;
2053
2054         err = create_qp(file, ucore, uhw, &cmd,
2055                         min(ucore->inlen, sizeof(cmd)),
2056                         ib_uverbs_ex_create_qp_cb, NULL);
2057
2058         if (err)
2059                 return err;
2060
2061         return 0;
2062 }
2063
2064 ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
2065                           struct ib_device *ib_dev,
2066                           const char __user *buf, int in_len, int out_len)
2067 {
2068         struct ib_uverbs_open_qp        cmd;
2069         struct ib_uverbs_create_qp_resp resp;
2070         struct ib_udata                 udata;
2071         struct ib_uqp_object           *obj;
2072         struct ib_xrcd                 *xrcd;
2073         struct ib_uobject              *uninitialized_var(xrcd_uobj);
2074         struct ib_qp                   *qp;
2075         struct ib_qp_open_attr          attr;
2076         int ret;
2077
2078         if (out_len < sizeof resp)
2079                 return -ENOSPC;
2080
2081         if (copy_from_user(&cmd, buf, sizeof cmd))
2082                 return -EFAULT;
2083
2084         INIT_UDATA(&udata, buf + sizeof cmd,
2085                    (unsigned long) cmd.response + sizeof resp,
2086                    in_len - sizeof cmd, out_len - sizeof resp);
2087
2088         obj = kmalloc(sizeof *obj, GFP_KERNEL);
2089         if (!obj)
2090                 return -ENOMEM;
2091
2092         init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
2093         down_write(&obj->uevent.uobject.mutex);
2094
2095         xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
2096         if (!xrcd) {
2097                 ret = -EINVAL;
2098                 goto err_put;
2099         }
2100
2101         attr.event_handler = ib_uverbs_qp_event_handler;
2102         attr.qp_context    = file;
2103         attr.qp_num        = cmd.qpn;
2104         attr.qp_type       = cmd.qp_type;
2105
2106         obj->uevent.events_reported = 0;
2107         INIT_LIST_HEAD(&obj->uevent.event_list);
2108         INIT_LIST_HEAD(&obj->mcast_list);
2109
2110         qp = ib_open_qp(xrcd, &attr);
2111         if (IS_ERR(qp)) {
2112                 ret = PTR_ERR(qp);
2113                 goto err_put;
2114         }
2115
2116         qp->uobject = &obj->uevent.uobject;
2117
2118         obj->uevent.uobject.object = qp;
2119         ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2120         if (ret)
2121                 goto err_destroy;
2122
2123         memset(&resp, 0, sizeof resp);
2124         resp.qpn       = qp->qp_num;
2125         resp.qp_handle = obj->uevent.uobject.id;
2126
2127         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2128                          &resp, sizeof resp)) {
2129                 ret = -EFAULT;
2130                 goto err_remove;
2131         }
2132
2133         obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
2134         atomic_inc(&obj->uxrcd->refcnt);
2135         put_xrcd_read(xrcd_uobj);
2136
2137         mutex_lock(&file->mutex);
2138         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
2139         mutex_unlock(&file->mutex);
2140
2141         obj->uevent.uobject.live = 1;
2142
2143         up_write(&obj->uevent.uobject.mutex);
2144
2145         return in_len;
2146
2147 err_remove:
2148         idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2149
2150 err_destroy:
2151         ib_destroy_qp(qp);
2152
2153 err_put:
2154         put_xrcd_read(xrcd_uobj);
2155         put_uobj_write(&obj->uevent.uobject);
2156         return ret;
2157 }
2158
2159 ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
2160                            struct ib_device *ib_dev,
2161                            const char __user *buf, int in_len,
2162                            int out_len)
2163 {
2164         struct ib_uverbs_query_qp      cmd;
2165         struct ib_uverbs_query_qp_resp resp;
2166         struct ib_qp                   *qp;
2167         struct ib_qp_attr              *attr;
2168         struct ib_qp_init_attr         *init_attr;
2169         int                            ret;
2170
2171         if (copy_from_user(&cmd, buf, sizeof cmd))
2172                 return -EFAULT;
2173
2174         attr      = kmalloc(sizeof *attr, GFP_KERNEL);
2175         init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
2176         if (!attr || !init_attr) {
2177                 ret = -ENOMEM;
2178                 goto out;
2179         }
2180
2181         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2182         if (!qp) {
2183                 ret = -EINVAL;
2184                 goto out;
2185         }
2186
2187         ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
2188
2189         put_qp_read(qp);
2190
2191         if (ret)
2192                 goto out;
2193
2194         memset(&resp, 0, sizeof resp);
2195
2196         resp.qp_state               = attr->qp_state;
2197         resp.cur_qp_state           = attr->cur_qp_state;
2198         resp.path_mtu               = attr->path_mtu;
2199         resp.path_mig_state         = attr->path_mig_state;
2200         resp.qkey                   = attr->qkey;
2201         resp.rq_psn                 = attr->rq_psn;
2202         resp.sq_psn                 = attr->sq_psn;
2203         resp.dest_qp_num            = attr->dest_qp_num;
2204         resp.qp_access_flags        = attr->qp_access_flags;
2205         resp.pkey_index             = attr->pkey_index;
2206         resp.alt_pkey_index         = attr->alt_pkey_index;
2207         resp.sq_draining            = attr->sq_draining;
2208         resp.max_rd_atomic          = attr->max_rd_atomic;
2209         resp.max_dest_rd_atomic     = attr->max_dest_rd_atomic;
2210         resp.min_rnr_timer          = attr->min_rnr_timer;
2211         resp.port_num               = attr->port_num;
2212         resp.timeout                = attr->timeout;
2213         resp.retry_cnt              = attr->retry_cnt;
2214         resp.rnr_retry              = attr->rnr_retry;
2215         resp.alt_port_num           = attr->alt_port_num;
2216         resp.alt_timeout            = attr->alt_timeout;
2217
2218         memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
2219         resp.dest.flow_label        = attr->ah_attr.grh.flow_label;
2220         resp.dest.sgid_index        = attr->ah_attr.grh.sgid_index;
2221         resp.dest.hop_limit         = attr->ah_attr.grh.hop_limit;
2222         resp.dest.traffic_class     = attr->ah_attr.grh.traffic_class;
2223         resp.dest.dlid              = attr->ah_attr.dlid;
2224         resp.dest.sl                = attr->ah_attr.sl;
2225         resp.dest.src_path_bits     = attr->ah_attr.src_path_bits;
2226         resp.dest.static_rate       = attr->ah_attr.static_rate;
2227         resp.dest.is_global         = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
2228         resp.dest.port_num          = attr->ah_attr.port_num;
2229
2230         memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
2231         resp.alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
2232         resp.alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
2233         resp.alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
2234         resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
2235         resp.alt_dest.dlid          = attr->alt_ah_attr.dlid;
2236         resp.alt_dest.sl            = attr->alt_ah_attr.sl;
2237         resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
2238         resp.alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
2239         resp.alt_dest.is_global     = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
2240         resp.alt_dest.port_num      = attr->alt_ah_attr.port_num;
2241
2242         resp.max_send_wr            = init_attr->cap.max_send_wr;
2243         resp.max_recv_wr            = init_attr->cap.max_recv_wr;
2244         resp.max_send_sge           = init_attr->cap.max_send_sge;
2245         resp.max_recv_sge           = init_attr->cap.max_recv_sge;
2246         resp.max_inline_data        = init_attr->cap.max_inline_data;
2247         resp.sq_sig_all             = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
2248
2249         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2250                          &resp, sizeof resp))
2251                 ret = -EFAULT;
2252
2253 out:
2254         kfree(attr);
2255         kfree(init_attr);
2256
2257         return ret ? ret : in_len;
2258 }
2259
2260 /* Remove ignored fields set in the attribute mask */
2261 static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
2262 {
2263         switch (qp_type) {
2264         case IB_QPT_XRC_INI:
2265                 return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
2266         case IB_QPT_XRC_TGT:
2267                 return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
2268                                 IB_QP_RNR_RETRY);
2269         default:
2270                 return mask;
2271         }
2272 }
2273
2274 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
2275                             struct ib_device *ib_dev,
2276                             const char __user *buf, int in_len,
2277                             int out_len)
2278 {
2279         struct ib_uverbs_modify_qp cmd;
2280         struct ib_udata            udata;
2281         struct ib_qp              *qp;
2282         struct ib_qp_attr         *attr;
2283         int                        ret;
2284
2285         if (copy_from_user(&cmd, buf, sizeof cmd))
2286                 return -EFAULT;
2287
2288         INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
2289                    out_len);
2290
2291         attr = kmalloc(sizeof *attr, GFP_KERNEL);
2292         if (!attr)
2293                 return -ENOMEM;
2294
2295         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2296         if (!qp) {
2297                 ret = -EINVAL;
2298                 goto out;
2299         }
2300
2301         attr->qp_state            = cmd.qp_state;
2302         attr->cur_qp_state        = cmd.cur_qp_state;
2303         attr->path_mtu            = cmd.path_mtu;
2304         attr->path_mig_state      = cmd.path_mig_state;
2305         attr->qkey                = cmd.qkey;
2306         attr->rq_psn              = cmd.rq_psn;
2307         attr->sq_psn              = cmd.sq_psn;
2308         attr->dest_qp_num         = cmd.dest_qp_num;
2309         attr->qp_access_flags     = cmd.qp_access_flags;
2310         attr->pkey_index          = cmd.pkey_index;
2311         attr->alt_pkey_index      = cmd.alt_pkey_index;
2312         attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
2313         attr->max_rd_atomic       = cmd.max_rd_atomic;
2314         attr->max_dest_rd_atomic  = cmd.max_dest_rd_atomic;
2315         attr->min_rnr_timer       = cmd.min_rnr_timer;
2316         attr->port_num            = cmd.port_num;
2317         attr->timeout             = cmd.timeout;
2318         attr->retry_cnt           = cmd.retry_cnt;
2319         attr->rnr_retry           = cmd.rnr_retry;
2320         attr->alt_port_num        = cmd.alt_port_num;
2321         attr->alt_timeout         = cmd.alt_timeout;
2322
2323         memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
2324         attr->ah_attr.grh.flow_label        = cmd.dest.flow_label;
2325         attr->ah_attr.grh.sgid_index        = cmd.dest.sgid_index;
2326         attr->ah_attr.grh.hop_limit         = cmd.dest.hop_limit;
2327         attr->ah_attr.grh.traffic_class     = cmd.dest.traffic_class;
2328         attr->ah_attr.dlid                  = cmd.dest.dlid;
2329         attr->ah_attr.sl                    = cmd.dest.sl;
2330         attr->ah_attr.src_path_bits         = cmd.dest.src_path_bits;
2331         attr->ah_attr.static_rate           = cmd.dest.static_rate;
2332         attr->ah_attr.ah_flags              = cmd.dest.is_global ? IB_AH_GRH : 0;
2333         attr->ah_attr.port_num              = cmd.dest.port_num;
2334
2335         memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
2336         attr->alt_ah_attr.grh.flow_label    = cmd.alt_dest.flow_label;
2337         attr->alt_ah_attr.grh.sgid_index    = cmd.alt_dest.sgid_index;
2338         attr->alt_ah_attr.grh.hop_limit     = cmd.alt_dest.hop_limit;
2339         attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
2340         attr->alt_ah_attr.dlid              = cmd.alt_dest.dlid;
2341         attr->alt_ah_attr.sl                = cmd.alt_dest.sl;
2342         attr->alt_ah_attr.src_path_bits     = cmd.alt_dest.src_path_bits;
2343         attr->alt_ah_attr.static_rate       = cmd.alt_dest.static_rate;
2344         attr->alt_ah_attr.ah_flags          = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
2345         attr->alt_ah_attr.port_num          = cmd.alt_dest.port_num;
2346
2347         if (qp->real_qp == qp) {
2348                 ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
2349                 if (ret)
2350                         goto release_qp;
2351                 ret = qp->device->modify_qp(qp, attr,
2352                         modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
2353         } else {
2354                 ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
2355         }
2356
2357         if (ret)
2358                 goto release_qp;
2359
2360         ret = in_len;
2361
2362 release_qp:
2363         put_qp_read(qp);
2364
2365 out:
2366         kfree(attr);
2367
2368         return ret;
2369 }
2370
2371 ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
2372                              struct ib_device *ib_dev,
2373                              const char __user *buf, int in_len,
2374                              int out_len)
2375 {
2376         struct ib_uverbs_destroy_qp      cmd;
2377         struct ib_uverbs_destroy_qp_resp resp;
2378         struct ib_uobject               *uobj;
2379         struct ib_qp                    *qp;
2380         struct ib_uqp_object            *obj;
2381         int                              ret = -EINVAL;
2382
2383         if (copy_from_user(&cmd, buf, sizeof cmd))
2384                 return -EFAULT;
2385
2386         memset(&resp, 0, sizeof resp);
2387
2388         uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
2389         if (!uobj)
2390                 return -EINVAL;
2391         qp  = uobj->object;
2392         obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
2393
2394         if (!list_empty(&obj->mcast_list)) {
2395                 put_uobj_write(uobj);
2396                 return -EBUSY;
2397         }
2398
2399         ret = ib_destroy_qp(qp);
2400         if (!ret)
2401                 uobj->live = 0;
2402
2403         put_uobj_write(uobj);
2404
2405         if (ret)
2406                 return ret;
2407
2408         if (obj->uxrcd)
2409                 atomic_dec(&obj->uxrcd->refcnt);
2410
2411         idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
2412
2413         mutex_lock(&file->mutex);
2414         list_del(&uobj->list);
2415         mutex_unlock(&file->mutex);
2416
2417         ib_uverbs_release_uevent(file, &obj->uevent);
2418
2419         resp.events_reported = obj->uevent.events_reported;
2420
2421         put_uobj(uobj);
2422
2423         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2424                          &resp, sizeof resp))
2425                 return -EFAULT;
2426
2427         return in_len;
2428 }
2429
2430 static void *alloc_wr(size_t wr_size, __u32 num_sge)
2431 {
2432         return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
2433                          num_sge * sizeof (struct ib_sge), GFP_KERNEL);
2434 };
2435
2436 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2437                             struct ib_device *ib_dev,
2438                             const char __user *buf, int in_len,
2439                             int out_len)
2440 {
2441         struct ib_uverbs_post_send      cmd;
2442         struct ib_uverbs_post_send_resp resp;
2443         struct ib_uverbs_send_wr       *user_wr;
2444         struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
2445         struct ib_qp                   *qp;
2446         int                             i, sg_ind;
2447         int                             is_ud;
2448         ssize_t                         ret = -EINVAL;
2449
2450         if (copy_from_user(&cmd, buf, sizeof cmd))
2451                 return -EFAULT;
2452
2453         if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
2454             cmd.sge_count * sizeof (struct ib_uverbs_sge))
2455                 return -EINVAL;
2456
2457         if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
2458                 return -EINVAL;
2459
2460         user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
2461         if (!user_wr)
2462                 return -ENOMEM;
2463
2464         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2465         if (!qp)
2466                 goto out;
2467
2468         is_ud = qp->qp_type == IB_QPT_UD;
2469         sg_ind = 0;
2470         last = NULL;
2471         for (i = 0; i < cmd.wr_count; ++i) {
2472                 if (copy_from_user(user_wr,
2473                                    buf + sizeof cmd + i * cmd.wqe_size,
2474                                    cmd.wqe_size)) {
2475                         ret = -EFAULT;
2476                         goto out_put;
2477                 }
2478
2479                 if (user_wr->num_sge + sg_ind > cmd.sge_count) {
2480                         ret = -EINVAL;
2481                         goto out_put;
2482                 }
2483
2484                 if (is_ud) {
2485                         struct ib_ud_wr *ud;
2486
2487                         if (user_wr->opcode != IB_WR_SEND &&
2488                             user_wr->opcode != IB_WR_SEND_WITH_IMM) {
2489                                 ret = -EINVAL;
2490                                 goto out_put;
2491                         }
2492
2493                         ud = alloc_wr(sizeof(*ud), user_wr->num_sge);
2494                         if (!ud) {
2495                                 ret = -ENOMEM;
2496                                 goto out_put;
2497                         }
2498
2499                         ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
2500                         if (!ud->ah) {
2501                                 kfree(ud);
2502                                 ret = -EINVAL;
2503                                 goto out_put;
2504                         }
2505                         ud->remote_qpn = user_wr->wr.ud.remote_qpn;
2506                         ud->remote_qkey = user_wr->wr.ud.remote_qkey;
2507
2508                         next = &ud->wr;
2509                 } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2510                            user_wr->opcode == IB_WR_RDMA_WRITE ||
2511                            user_wr->opcode == IB_WR_RDMA_READ) {
2512                         struct ib_rdma_wr *rdma;
2513
2514                         rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge);
2515                         if (!rdma) {
2516                                 ret = -ENOMEM;
2517                                 goto out_put;
2518                         }
2519
2520                         rdma->remote_addr = user_wr->wr.rdma.remote_addr;
2521                         rdma->rkey = user_wr->wr.rdma.rkey;
2522
2523                         next = &rdma->wr;
2524                 } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2525                            user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2526                         struct ib_atomic_wr *atomic;
2527
2528                         atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge);
2529                         if (!atomic) {
2530                                 ret = -ENOMEM;
2531                                 goto out_put;
2532                         }
2533
2534                         atomic->remote_addr = user_wr->wr.atomic.remote_addr;
2535                         atomic->compare_add = user_wr->wr.atomic.compare_add;
2536                         atomic->swap = user_wr->wr.atomic.swap;
2537                         atomic->rkey = user_wr->wr.atomic.rkey;
2538
2539                         next = &atomic->wr;
2540                 } else if (user_wr->opcode == IB_WR_SEND ||
2541                            user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2542                            user_wr->opcode == IB_WR_SEND_WITH_INV) {
2543                         next = alloc_wr(sizeof(*next), user_wr->num_sge);
2544                         if (!next) {
2545                                 ret = -ENOMEM;
2546                                 goto out_put;
2547                         }
2548                 } else {
2549                         ret = -EINVAL;
2550                         goto out_put;
2551                 }
2552
2553                 if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2554                     user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
2555                         next->ex.imm_data =
2556                                         (__be32 __force) user_wr->ex.imm_data;
2557                 } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
2558                         next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
2559                 }
2560
2561                 if (!last)
2562                         wr = next;
2563                 else
2564                         last->next = next;
2565                 last = next;
2566
2567                 next->next       = NULL;
2568                 next->wr_id      = user_wr->wr_id;
2569                 next->num_sge    = user_wr->num_sge;
2570                 next->opcode     = user_wr->opcode;
2571                 next->send_flags = user_wr->send_flags;
2572
2573                 if (next->num_sge) {
2574                         next->sg_list = (void *) next +
2575                                 ALIGN(sizeof *next, sizeof (struct ib_sge));
2576                         if (copy_from_user(next->sg_list,
2577                                            buf + sizeof cmd +
2578                                            cmd.wr_count * cmd.wqe_size +
2579                                            sg_ind * sizeof (struct ib_sge),
2580                                            next->num_sge * sizeof (struct ib_sge))) {
2581                                 ret = -EFAULT;
2582                                 goto out_put;
2583                         }
2584                         sg_ind += next->num_sge;
2585                 } else
2586                         next->sg_list = NULL;
2587         }
2588
2589         resp.bad_wr = 0;
2590         ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
2591         if (ret)
2592                 for (next = wr; next; next = next->next) {
2593                         ++resp.bad_wr;
2594                         if (next == bad_wr)
2595                                 break;
2596                 }
2597
2598         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2599                          &resp, sizeof resp))
2600                 ret = -EFAULT;
2601
2602 out_put:
2603         put_qp_read(qp);
2604
2605         while (wr) {
2606                 if (is_ud && ud_wr(wr)->ah)
2607                         put_ah_read(ud_wr(wr)->ah);
2608                 next = wr->next;
2609                 kfree(wr);
2610                 wr = next;
2611         }
2612
2613 out:
2614         kfree(user_wr);
2615
2616         return ret ? ret : in_len;
2617 }
2618
2619 static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
2620                                                     int in_len,
2621                                                     u32 wr_count,
2622                                                     u32 sge_count,
2623                                                     u32 wqe_size)
2624 {
2625         struct ib_uverbs_recv_wr *user_wr;
2626         struct ib_recv_wr        *wr = NULL, *last, *next;
2627         int                       sg_ind;
2628         int                       i;
2629         int                       ret;
2630
2631         if (in_len < wqe_size * wr_count +
2632             sge_count * sizeof (struct ib_uverbs_sge))
2633                 return ERR_PTR(-EINVAL);
2634
2635         if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
2636                 return ERR_PTR(-EINVAL);
2637
2638         user_wr = kmalloc(wqe_size, GFP_KERNEL);
2639         if (!user_wr)
2640                 return ERR_PTR(-ENOMEM);
2641
2642         sg_ind = 0;
2643         last = NULL;
2644         for (i = 0; i < wr_count; ++i) {
2645                 if (copy_from_user(user_wr, buf + i * wqe_size,
2646                                    wqe_size)) {
2647                         ret = -EFAULT;
2648                         goto err;
2649                 }
2650
2651                 if (user_wr->num_sge + sg_ind > sge_count) {
2652                         ret = -EINVAL;
2653                         goto err;
2654                 }
2655
2656                 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
2657                                user_wr->num_sge * sizeof (struct ib_sge),
2658                                GFP_KERNEL);
2659                 if (!next) {
2660                         ret = -ENOMEM;
2661                         goto err;
2662                 }
2663
2664                 if (!last)
2665                         wr = next;
2666                 else
2667                         last->next = next;
2668                 last = next;
2669
2670                 next->next       = NULL;
2671                 next->wr_id      = user_wr->wr_id;
2672                 next->num_sge    = user_wr->num_sge;
2673
2674                 if (next->num_sge) {
2675                         next->sg_list = (void *) next +
2676                                 ALIGN(sizeof *next, sizeof (struct ib_sge));
2677                         if (copy_from_user(next->sg_list,
2678                                            buf + wr_count * wqe_size +
2679                                            sg_ind * sizeof (struct ib_sge),
2680                                            next->num_sge * sizeof (struct ib_sge))) {
2681                                 ret = -EFAULT;
2682                                 goto err;
2683                         }
2684                         sg_ind += next->num_sge;
2685                 } else
2686                         next->sg_list = NULL;
2687         }
2688
2689         kfree(user_wr);
2690         return wr;
2691
2692 err:
2693         kfree(user_wr);
2694
2695         while (wr) {
2696                 next = wr->next;
2697                 kfree(wr);
2698                 wr = next;
2699         }
2700
2701         return ERR_PTR(ret);
2702 }
2703
2704 ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
2705                             struct ib_device *ib_dev,
2706                             const char __user *buf, int in_len,
2707                             int out_len)
2708 {
2709         struct ib_uverbs_post_recv      cmd;
2710         struct ib_uverbs_post_recv_resp resp;
2711         struct ib_recv_wr              *wr, *next, *bad_wr;
2712         struct ib_qp                   *qp;
2713         ssize_t                         ret = -EINVAL;
2714
2715         if (copy_from_user(&cmd, buf, sizeof cmd))
2716                 return -EFAULT;
2717
2718         wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
2719                                        in_len - sizeof cmd, cmd.wr_count,
2720                                        cmd.sge_count, cmd.wqe_size);
2721         if (IS_ERR(wr))
2722                 return PTR_ERR(wr);
2723
2724         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2725         if (!qp)
2726                 goto out;
2727
2728         resp.bad_wr = 0;
2729         ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
2730
2731         put_qp_read(qp);
2732
2733         if (ret)
2734                 for (next = wr; next; next = next->next) {
2735                         ++resp.bad_wr;
2736                         if (next == bad_wr)
2737                                 break;
2738                 }
2739
2740         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2741                          &resp, sizeof resp))
2742                 ret = -EFAULT;
2743
2744 out:
2745         while (wr) {
2746                 next = wr->next;
2747                 kfree(wr);
2748                 wr = next;
2749         }
2750
2751         return ret ? ret : in_len;
2752 }
2753
2754 ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
2755                                 struct ib_device *ib_dev,
2756                                 const char __user *buf, int in_len,
2757                                 int out_len)
2758 {
2759         struct ib_uverbs_post_srq_recv      cmd;
2760         struct ib_uverbs_post_srq_recv_resp resp;
2761         struct ib_recv_wr                  *wr, *next, *bad_wr;
2762         struct ib_srq                      *srq;
2763         ssize_t                             ret = -EINVAL;
2764
2765         if (copy_from_user(&cmd, buf, sizeof cmd))
2766                 return -EFAULT;
2767
2768         wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
2769                                        in_len - sizeof cmd, cmd.wr_count,
2770                                        cmd.sge_count, cmd.wqe_size);
2771         if (IS_ERR(wr))
2772                 return PTR_ERR(wr);
2773
2774         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
2775         if (!srq)
2776                 goto out;
2777
2778         resp.bad_wr = 0;
2779         ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
2780
2781         put_srq_read(srq);
2782
2783         if (ret)
2784                 for (next = wr; next; next = next->next) {
2785                         ++resp.bad_wr;
2786                         if (next == bad_wr)
2787                                 break;
2788                 }
2789
2790         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2791                          &resp, sizeof resp))
2792                 ret = -EFAULT;
2793
2794 out:
2795         while (wr) {
2796                 next = wr->next;
2797                 kfree(wr);
2798                 wr = next;
2799         }
2800
2801         return ret ? ret : in_len;
2802 }
2803
2804 ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2805                             struct ib_device *ib_dev,
2806                             const char __user *buf, int in_len,
2807                             int out_len)
2808 {
2809         struct ib_uverbs_create_ah       cmd;
2810         struct ib_uverbs_create_ah_resp  resp;
2811         struct ib_uobject               *uobj;
2812         struct ib_pd                    *pd;
2813         struct ib_ah                    *ah;
2814         struct ib_ah_attr               attr;
2815         int ret;
2816
2817         if (out_len < sizeof resp)
2818                 return -ENOSPC;
2819
2820         if (copy_from_user(&cmd, buf, sizeof cmd))
2821                 return -EFAULT;
2822
2823         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
2824         if (!uobj)
2825                 return -ENOMEM;
2826
2827         init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
2828         down_write(&uobj->mutex);
2829
2830         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
2831         if (!pd) {
2832                 ret = -EINVAL;
2833                 goto err;
2834         }
2835
2836         attr.dlid              = cmd.attr.dlid;
2837         attr.sl                = cmd.attr.sl;
2838         attr.src_path_bits     = cmd.attr.src_path_bits;
2839         attr.static_rate       = cmd.attr.static_rate;
2840         attr.ah_flags          = cmd.attr.is_global ? IB_AH_GRH : 0;
2841         attr.port_num          = cmd.attr.port_num;
2842         attr.grh.flow_label    = cmd.attr.grh.flow_label;
2843         attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
2844         attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
2845         attr.grh.traffic_class = cmd.attr.grh.traffic_class;
2846         memset(&attr.dmac, 0, sizeof(attr.dmac));
2847         memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
2848
2849         ah = ib_create_ah(pd, &attr);
2850         if (IS_ERR(ah)) {
2851                 ret = PTR_ERR(ah);
2852                 goto err_put;
2853         }
2854
2855         ah->uobject  = uobj;
2856         uobj->object = ah;
2857
2858         ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
2859         if (ret)
2860                 goto err_destroy;
2861
2862         resp.ah_handle = uobj->id;
2863
2864         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2865                          &resp, sizeof resp)) {
2866                 ret = -EFAULT;
2867                 goto err_copy;
2868         }
2869
2870         put_pd_read(pd);
2871
2872         mutex_lock(&file->mutex);
2873         list_add_tail(&uobj->list, &file->ucontext->ah_list);
2874         mutex_unlock(&file->mutex);
2875
2876         uobj->live = 1;
2877
2878         up_write(&uobj->mutex);
2879
2880         return in_len;
2881
2882 err_copy:
2883         idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
2884
2885 err_destroy:
2886         ib_destroy_ah(ah);
2887
2888 err_put:
2889         put_pd_read(pd);
2890
2891 err:
2892         put_uobj_write(uobj);
2893         return ret;
2894 }
2895
2896 ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
2897                              struct ib_device *ib_dev,
2898                              const char __user *buf, int in_len, int out_len)
2899 {
2900         struct ib_uverbs_destroy_ah cmd;
2901         struct ib_ah               *ah;
2902         struct ib_uobject          *uobj;
2903         int                         ret;
2904
2905         if (copy_from_user(&cmd, buf, sizeof cmd))
2906                 return -EFAULT;
2907
2908         uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
2909         if (!uobj)
2910                 return -EINVAL;
2911         ah = uobj->object;
2912
2913         ret = ib_destroy_ah(ah);
2914         if (!ret)
2915                 uobj->live = 0;
2916
2917         put_uobj_write(uobj);
2918
2919         if (ret)
2920                 return ret;
2921
2922         idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
2923
2924         mutex_lock(&file->mutex);
2925         list_del(&uobj->list);
2926         mutex_unlock(&file->mutex);
2927
2928         put_uobj(uobj);
2929
2930         return in_len;
2931 }
2932
2933 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
2934                                struct ib_device *ib_dev,
2935                                const char __user *buf, int in_len,
2936                                int out_len)
2937 {
2938         struct ib_uverbs_attach_mcast cmd;
2939         struct ib_qp                 *qp;
2940         struct ib_uqp_object         *obj;
2941         struct ib_uverbs_mcast_entry *mcast;
2942         int                           ret;
2943
2944         if (copy_from_user(&cmd, buf, sizeof cmd))
2945                 return -EFAULT;
2946
2947         qp = idr_write_qp(cmd.qp_handle, file->ucontext);
2948         if (!qp)
2949                 return -EINVAL;
2950
2951         obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
2952
2953         list_for_each_entry(mcast, &obj->mcast_list, list)
2954                 if (cmd.mlid == mcast->lid &&
2955                     !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
2956                         ret = 0;
2957                         goto out_put;
2958                 }
2959
2960         mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
2961         if (!mcast) {
2962                 ret = -ENOMEM;
2963                 goto out_put;
2964         }
2965
2966         mcast->lid = cmd.mlid;
2967         memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
2968
2969         ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
2970         if (!ret)
2971                 list_add_tail(&mcast->list, &obj->mcast_list);
2972         else
2973                 kfree(mcast);
2974
2975 out_put:
2976         put_qp_write(qp);
2977
2978         return ret ? ret : in_len;
2979 }
2980
2981 ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
2982                                struct ib_device *ib_dev,
2983                                const char __user *buf, int in_len,
2984                                int out_len)
2985 {
2986         struct ib_uverbs_detach_mcast cmd;
2987         struct ib_uqp_object         *obj;
2988         struct ib_qp                 *qp;
2989         struct ib_uverbs_mcast_entry *mcast;
2990         int                           ret = -EINVAL;
2991
2992         if (copy_from_user(&cmd, buf, sizeof cmd))
2993                 return -EFAULT;
2994
2995         qp = idr_write_qp(cmd.qp_handle, file->ucontext);
2996         if (!qp)
2997                 return -EINVAL;
2998
2999         ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
3000         if (ret)
3001                 goto out_put;
3002
3003         obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
3004
3005         list_for_each_entry(mcast, &obj->mcast_list, list)
3006                 if (cmd.mlid == mcast->lid &&
3007                     !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
3008                         list_del(&mcast->list);
3009                         kfree(mcast);
3010                         break;
3011                 }
3012
3013 out_put:
3014         put_qp_write(qp);
3015
3016         return ret ? ret : in_len;
3017 }
3018
3019 static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
3020                                 union ib_flow_spec *ib_spec)
3021 {
3022         if (kern_spec->reserved)
3023                 return -EINVAL;
3024
3025         ib_spec->type = kern_spec->type;
3026
3027         switch (ib_spec->type) {
3028         case IB_FLOW_SPEC_ETH:
3029                 ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
3030                 if (ib_spec->eth.size != kern_spec->eth.size)
3031                         return -EINVAL;
3032                 memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
3033                        sizeof(struct ib_flow_eth_filter));
3034                 memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
3035                        sizeof(struct ib_flow_eth_filter));
3036                 break;
3037         case IB_FLOW_SPEC_IPV4:
3038                 ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
3039                 if (ib_spec->ipv4.size != kern_spec->ipv4.size)
3040                         return -EINVAL;
3041                 memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
3042                        sizeof(struct ib_flow_ipv4_filter));
3043                 memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
3044                        sizeof(struct ib_flow_ipv4_filter));
3045                 break;
3046         case IB_FLOW_SPEC_TCP:
3047         case IB_FLOW_SPEC_UDP:
3048                 ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
3049                 if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
3050                         return -EINVAL;
3051                 memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
3052                        sizeof(struct ib_flow_tcp_udp_filter));
3053                 memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
3054                        sizeof(struct ib_flow_tcp_udp_filter));
3055                 break;
3056         default:
3057                 return -EINVAL;
3058         }
3059         return 0;
3060 }
3061
3062 int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3063                              struct ib_device *ib_dev,
3064                              struct ib_udata *ucore,
3065                              struct ib_udata *uhw)
3066 {
3067         struct ib_uverbs_create_flow      cmd;
3068         struct ib_uverbs_create_flow_resp resp;
3069         struct ib_uobject                 *uobj;
3070         struct ib_flow                    *flow_id;
3071         struct ib_uverbs_flow_attr        *kern_flow_attr;
3072         struct ib_flow_attr               *flow_attr;
3073         struct ib_qp                      *qp;
3074         int err = 0;
3075         void *kern_spec;
3076         void *ib_spec;
3077         int i;
3078
3079         if (ucore->inlen < sizeof(cmd))
3080                 return -EINVAL;
3081
3082         if (ucore->outlen < sizeof(resp))
3083                 return -ENOSPC;
3084
3085         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3086         if (err)
3087                 return err;
3088
3089         ucore->inbuf += sizeof(cmd);
3090         ucore->inlen -= sizeof(cmd);
3091
3092         if (cmd.comp_mask)
3093                 return -EINVAL;
3094
3095         if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
3096              !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
3097                 return -EPERM;
3098
3099         if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
3100                 return -EINVAL;
3101
3102         if (cmd.flow_attr.size > ucore->inlen ||
3103             cmd.flow_attr.size >
3104             (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
3105                 return -EINVAL;
3106
3107         if (cmd.flow_attr.reserved[0] ||
3108             cmd.flow_attr.reserved[1])
3109                 return -EINVAL;
3110
3111         if (cmd.flow_attr.num_of_specs) {
3112                 kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
3113                                          GFP_KERNEL);
3114                 if (!kern_flow_attr)
3115                         return -ENOMEM;
3116
3117                 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
3118                 err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
3119                                          cmd.flow_attr.size);
3120                 if (err)
3121                         goto err_free_attr;
3122         } else {
3123                 kern_flow_attr = &cmd.flow_attr;
3124         }
3125
3126         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
3127         if (!uobj) {
3128                 err = -ENOMEM;
3129                 goto err_free_attr;
3130         }
3131         init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
3132         down_write(&uobj->mutex);
3133
3134         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
3135         if (!qp) {
3136                 err = -EINVAL;
3137                 goto err_uobj;
3138         }
3139
3140         flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
3141         if (!flow_attr) {
3142                 err = -ENOMEM;
3143                 goto err_put;
3144         }
3145
3146         flow_attr->type = kern_flow_attr->type;
3147         flow_attr->priority = kern_flow_attr->priority;
3148         flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
3149         flow_attr->port = kern_flow_attr->port;
3150         flow_attr->flags = kern_flow_attr->flags;
3151         flow_attr->size = sizeof(*flow_attr);
3152
3153         kern_spec = kern_flow_attr + 1;
3154         ib_spec = flow_attr + 1;
3155         for (i = 0; i < flow_attr->num_of_specs &&
3156              cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
3157              cmd.flow_attr.size >=
3158              ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
3159                 err = kern_spec_to_ib_spec(kern_spec, ib_spec);
3160                 if (err)
3161                         goto err_free;
3162                 flow_attr->size +=
3163                         ((union ib_flow_spec *) ib_spec)->size;
3164                 cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
3165                 kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
3166                 ib_spec += ((union ib_flow_spec *) ib_spec)->size;
3167         }
3168         if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
3169                 pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
3170                         i, cmd.flow_attr.size);
3171                 err = -EINVAL;
3172                 goto err_free;
3173         }
3174         flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
3175         if (IS_ERR(flow_id)) {
3176                 err = PTR_ERR(flow_id);
3177                 goto err_free;
3178         }
3179         flow_id->qp = qp;
3180         flow_id->uobject = uobj;
3181         uobj->object = flow_id;
3182
3183         err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
3184         if (err)
3185                 goto destroy_flow;
3186
3187         memset(&resp, 0, sizeof(resp));
3188         resp.flow_handle = uobj->id;
3189
3190         err = ib_copy_to_udata(ucore,
3191                                &resp, sizeof(resp));
3192         if (err)
3193                 goto err_copy;
3194
3195         put_qp_read(qp);
3196         mutex_lock(&file->mutex);
3197         list_add_tail(&uobj->list, &file->ucontext->rule_list);
3198         mutex_unlock(&file->mutex);
3199
3200         uobj->live = 1;
3201
3202         up_write(&uobj->mutex);
3203         kfree(flow_attr);
3204         if (cmd.flow_attr.num_of_specs)
3205                 kfree(kern_flow_attr);
3206         return 0;
3207 err_copy:
3208         idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
3209 destroy_flow:
3210         ib_destroy_flow(flow_id);
3211 err_free:
3212         kfree(flow_attr);
3213 err_put:
3214         put_qp_read(qp);
3215 err_uobj:
3216         put_uobj_write(uobj);
3217 err_free_attr:
3218         if (cmd.flow_attr.num_of_specs)
3219                 kfree(kern_flow_attr);
3220         return err;
3221 }
3222
3223 int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
3224                               struct ib_device *ib_dev,
3225                               struct ib_udata *ucore,
3226                               struct ib_udata *uhw)
3227 {
3228         struct ib_uverbs_destroy_flow   cmd;
3229         struct ib_flow                  *flow_id;
3230         struct ib_uobject               *uobj;
3231         int                             ret;
3232
3233         if (ucore->inlen < sizeof(cmd))
3234                 return -EINVAL;
3235
3236         ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3237         if (ret)
3238                 return ret;
3239
3240         if (cmd.comp_mask)
3241                 return -EINVAL;
3242
3243         uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
3244                               file->ucontext);
3245         if (!uobj)
3246                 return -EINVAL;
3247         flow_id = uobj->object;
3248
3249         ret = ib_destroy_flow(flow_id);
3250         if (!ret)
3251                 uobj->live = 0;
3252
3253         put_uobj_write(uobj);
3254
3255         idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
3256
3257         mutex_lock(&file->mutex);
3258         list_del(&uobj->list);
3259         mutex_unlock(&file->mutex);
3260
3261         put_uobj(uobj);
3262
3263         return ret;
3264 }
3265
3266 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3267                                 struct ib_device *ib_dev,
3268                                 struct ib_uverbs_create_xsrq *cmd,
3269                                 struct ib_udata *udata)
3270 {
3271         struct ib_uverbs_create_srq_resp resp;
3272         struct ib_usrq_object           *obj;
3273         struct ib_pd                    *pd;
3274         struct ib_srq                   *srq;
3275         struct ib_uobject               *uninitialized_var(xrcd_uobj);
3276         struct ib_srq_init_attr          attr;
3277         int ret;
3278
3279         obj = kmalloc(sizeof *obj, GFP_KERNEL);
3280         if (!obj)
3281                 return -ENOMEM;
3282
3283         init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
3284         down_write(&obj->uevent.uobject.mutex);
3285
3286         if (cmd->srq_type == IB_SRQT_XRC) {
3287                 attr.ext.xrc.xrcd  = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
3288                 if (!attr.ext.xrc.xrcd) {
3289                         ret = -EINVAL;
3290                         goto err;
3291                 }
3292
3293                 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
3294                 atomic_inc(&obj->uxrcd->refcnt);
3295
3296                 attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
3297                 if (!attr.ext.xrc.cq) {
3298                         ret = -EINVAL;
3299                         goto err_put_xrcd;
3300                 }
3301         }
3302
3303         pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
3304         if (!pd) {
3305                 ret = -EINVAL;
3306                 goto err_put_cq;
3307         }
3308
3309         attr.event_handler  = ib_uverbs_srq_event_handler;
3310         attr.srq_context    = file;
3311         attr.srq_type       = cmd->srq_type;
3312         attr.attr.max_wr    = cmd->max_wr;
3313         attr.attr.max_sge   = cmd->max_sge;
3314         attr.attr.srq_limit = cmd->srq_limit;
3315
3316         obj->uevent.events_reported = 0;
3317         INIT_LIST_HEAD(&obj->uevent.event_list);
3318
3319         srq = pd->device->create_srq(pd, &attr, udata);
3320         if (IS_ERR(srq)) {
3321                 ret = PTR_ERR(srq);
3322                 goto err_put;
3323         }
3324
3325         srq->device        = pd->device;
3326         srq->pd            = pd;
3327         srq->srq_type      = cmd->srq_type;
3328         srq->uobject       = &obj->uevent.uobject;
3329         srq->event_handler = attr.event_handler;
3330         srq->srq_context   = attr.srq_context;
3331
3332         if (cmd->srq_type == IB_SRQT_XRC) {
3333                 srq->ext.xrc.cq   = attr.ext.xrc.cq;
3334                 srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
3335                 atomic_inc(&attr.ext.xrc.cq->usecnt);
3336                 atomic_inc(&attr.ext.xrc.xrcd->usecnt);
3337         }
3338
3339         atomic_inc(&pd->usecnt);
3340         atomic_set(&srq->usecnt, 0);
3341
3342         obj->uevent.uobject.object = srq;
3343         ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
3344         if (ret)
3345                 goto err_destroy;
3346
3347         memset(&resp, 0, sizeof resp);
3348         resp.srq_handle = obj->uevent.uobject.id;
3349         resp.max_wr     = attr.attr.max_wr;
3350         resp.max_sge    = attr.attr.max_sge;
3351         if (cmd->srq_type == IB_SRQT_XRC)
3352                 resp.srqn = srq->ext.xrc.srq_num;
3353
3354         if (copy_to_user((void __user *) (unsigned long) cmd->response,
3355                          &resp, sizeof resp)) {
3356                 ret = -EFAULT;
3357                 goto err_copy;
3358         }
3359
3360         if (cmd->srq_type == IB_SRQT_XRC) {
3361                 put_uobj_read(xrcd_uobj);
3362                 put_cq_read(attr.ext.xrc.cq);
3363         }
3364         put_pd_read(pd);
3365
3366         mutex_lock(&file->mutex);
3367         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
3368         mutex_unlock(&file->mutex);
3369
3370         obj->uevent.uobject.live = 1;
3371
3372         up_write(&obj->uevent.uobject.mutex);
3373
3374         return 0;
3375
3376 err_copy:
3377         idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
3378
3379 err_destroy:
3380         ib_destroy_srq(srq);
3381
3382 err_put:
3383         put_pd_read(pd);
3384
3385 err_put_cq:
3386         if (cmd->srq_type == IB_SRQT_XRC)
3387                 put_cq_read(attr.ext.xrc.cq);
3388
3389 err_put_xrcd:
3390         if (cmd->srq_type == IB_SRQT_XRC) {
3391                 atomic_dec(&obj->uxrcd->refcnt);
3392                 put_uobj_read(xrcd_uobj);
3393         }
3394
3395 err:
3396         put_uobj_write(&obj->uevent.uobject);
3397         return ret;
3398 }
3399
3400 ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
3401                              struct ib_device *ib_dev,
3402                              const char __user *buf, int in_len,
3403                              int out_len)
3404 {
3405         struct ib_uverbs_create_srq      cmd;
3406         struct ib_uverbs_create_xsrq     xcmd;
3407         struct ib_uverbs_create_srq_resp resp;
3408         struct ib_udata                  udata;
3409         int ret;
3410
3411         if (out_len < sizeof resp)
3412                 return -ENOSPC;
3413
3414         if (copy_from_user(&cmd, buf, sizeof cmd))
3415                 return -EFAULT;
3416
3417         xcmd.response    = cmd.response;
3418         xcmd.user_handle = cmd.user_handle;
3419         xcmd.srq_type    = IB_SRQT_BASIC;
3420         xcmd.pd_handle   = cmd.pd_handle;
3421         xcmd.max_wr      = cmd.max_wr;
3422         xcmd.max_sge     = cmd.max_sge;
3423         xcmd.srq_limit   = cmd.srq_limit;
3424
3425         INIT_UDATA(&udata, buf + sizeof cmd,
3426                    (unsigned long) cmd.response + sizeof resp,
3427                    in_len - sizeof cmd, out_len - sizeof resp);
3428
3429         ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
3430         if (ret)
3431                 return ret;
3432
3433         return in_len;
3434 }
3435
3436 ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
3437                               struct ib_device *ib_dev,
3438                               const char __user *buf, int in_len, int out_len)
3439 {
3440         struct ib_uverbs_create_xsrq     cmd;
3441         struct ib_uverbs_create_srq_resp resp;
3442         struct ib_udata                  udata;
3443         int ret;
3444
3445         if (out_len < sizeof resp)
3446                 return -ENOSPC;
3447
3448         if (copy_from_user(&cmd, buf, sizeof cmd))
3449                 return -EFAULT;
3450
3451         INIT_UDATA(&udata, buf + sizeof cmd,
3452                    (unsigned long) cmd.response + sizeof resp,
3453                    in_len - sizeof cmd, out_len - sizeof resp);
3454
3455         ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
3456         if (ret)
3457                 return ret;
3458
3459         return in_len;
3460 }
3461
3462 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
3463                              struct ib_device *ib_dev,
3464                              const char __user *buf, int in_len,
3465                              int out_len)
3466 {
3467         struct ib_uverbs_modify_srq cmd;
3468         struct ib_udata             udata;
3469         struct ib_srq              *srq;
3470         struct ib_srq_attr          attr;
3471         int                         ret;
3472
3473         if (copy_from_user(&cmd, buf, sizeof cmd))
3474                 return -EFAULT;
3475
3476         INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
3477                    out_len);
3478
3479         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
3480         if (!srq)
3481                 return -EINVAL;
3482
3483         attr.max_wr    = cmd.max_wr;
3484         attr.srq_limit = cmd.srq_limit;
3485
3486         ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
3487
3488         put_srq_read(srq);
3489
3490         return ret ? ret : in_len;
3491 }
3492
3493 ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
3494                             struct ib_device *ib_dev,
3495                             const char __user *buf,
3496                             int in_len, int out_len)
3497 {
3498         struct ib_uverbs_query_srq      cmd;
3499         struct ib_uverbs_query_srq_resp resp;
3500         struct ib_srq_attr              attr;
3501         struct ib_srq                   *srq;
3502         int                             ret;
3503
3504         if (out_len < sizeof resp)
3505                 return -ENOSPC;
3506
3507         if (copy_from_user(&cmd, buf, sizeof cmd))
3508                 return -EFAULT;
3509
3510         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
3511         if (!srq)
3512                 return -EINVAL;
3513
3514         ret = ib_query_srq(srq, &attr);
3515
3516         put_srq_read(srq);
3517
3518         if (ret)
3519                 return ret;
3520
3521         memset(&resp, 0, sizeof resp);
3522
3523         resp.max_wr    = attr.max_wr;
3524         resp.max_sge   = attr.max_sge;
3525         resp.srq_limit = attr.srq_limit;
3526
3527         if (copy_to_user((void __user *) (unsigned long) cmd.response,
3528                          &resp, sizeof resp))
3529                 return -EFAULT;
3530
3531         return in_len;
3532 }
3533
3534 ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
3535                               struct ib_device *ib_dev,
3536                               const char __user *buf, int in_len,
3537                               int out_len)
3538 {
3539         struct ib_uverbs_destroy_srq      cmd;
3540         struct ib_uverbs_destroy_srq_resp resp;
3541         struct ib_uobject                *uobj;
3542         struct ib_srq                    *srq;
3543         struct ib_uevent_object          *obj;
3544         int                               ret = -EINVAL;
3545         struct ib_usrq_object            *us;
3546         enum ib_srq_type                  srq_type;
3547
3548         if (copy_from_user(&cmd, buf, sizeof cmd))
3549                 return -EFAULT;
3550
3551         uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
3552         if (!uobj)
3553                 return -EINVAL;
3554         srq = uobj->object;
3555         obj = container_of(uobj, struct ib_uevent_object, uobject);
3556         srq_type = srq->srq_type;
3557
3558         ret = ib_destroy_srq(srq);
3559         if (!ret)
3560                 uobj->live = 0;
3561
3562         put_uobj_write(uobj);
3563
3564         if (ret)
3565                 return ret;
3566
3567         if (srq_type == IB_SRQT_XRC) {
3568                 us = container_of(obj, struct ib_usrq_object, uevent);
3569                 atomic_dec(&us->uxrcd->refcnt);
3570         }
3571
3572         idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
3573
3574         mutex_lock(&file->mutex);
3575         list_del(&uobj->list);
3576         mutex_unlock(&file->mutex);
3577
3578         ib_uverbs_release_uevent(file, obj);
3579
3580         memset(&resp, 0, sizeof resp);
3581         resp.events_reported = obj->events_reported;
3582
3583         put_uobj(uobj);
3584
3585         if (copy_to_user((void __user *) (unsigned long) cmd.response,
3586                          &resp, sizeof resp))
3587                 ret = -EFAULT;
3588
3589         return ret ? ret : in_len;
3590 }
3591
3592 int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
3593                               struct ib_device *ib_dev,
3594                               struct ib_udata *ucore,
3595                               struct ib_udata *uhw)
3596 {
3597         struct ib_uverbs_ex_query_device_resp resp;
3598         struct ib_uverbs_ex_query_device  cmd;
3599         struct ib_device_attr attr;
3600         int err;
3601
3602         if (ucore->inlen < sizeof(cmd))
3603                 return -EINVAL;
3604
3605         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3606         if (err)
3607                 return err;
3608
3609         if (cmd.comp_mask)
3610                 return -EINVAL;
3611
3612         if (cmd.reserved)
3613                 return -EINVAL;
3614
3615         resp.response_length = offsetof(typeof(resp), odp_caps);
3616
3617         if (ucore->outlen < resp.response_length)
3618                 return -ENOSPC;
3619
3620         memset(&attr, 0, sizeof(attr));
3621
3622         err = ib_dev->query_device(ib_dev, &attr, uhw);
3623         if (err)
3624                 return err;
3625
3626         copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
3627         resp.comp_mask = 0;
3628
3629         if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
3630                 goto end;
3631
3632 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
3633         resp.odp_caps.general_caps = attr.odp_caps.general_caps;
3634         resp.odp_caps.per_transport_caps.rc_odp_caps =
3635                 attr.odp_caps.per_transport_caps.rc_odp_caps;
3636         resp.odp_caps.per_transport_caps.uc_odp_caps =
3637                 attr.odp_caps.per_transport_caps.uc_odp_caps;
3638         resp.odp_caps.per_transport_caps.ud_odp_caps =
3639                 attr.odp_caps.per_transport_caps.ud_odp_caps;
3640         resp.odp_caps.reserved = 0;
3641 #else
3642         memset(&resp.odp_caps, 0, sizeof(resp.odp_caps));
3643 #endif
3644         resp.response_length += sizeof(resp.odp_caps);
3645
3646         if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
3647                 goto end;
3648
3649         resp.timestamp_mask = attr.timestamp_mask;
3650         resp.response_length += sizeof(resp.timestamp_mask);
3651
3652         if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
3653                 goto end;
3654
3655         resp.hca_core_clock = attr.hca_core_clock;
3656         resp.response_length += sizeof(resp.hca_core_clock);
3657
3658 end:
3659         err = ib_copy_to_udata(ucore, &resp, resp.response_length);
3660         if (err)
3661                 return err;
3662
3663         return 0;
3664 }