Merge branch 'core-objtool-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / drivers / infiniband / core / iwcm.c
1 /*
2  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
7  * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
8  *
9  * This software is available to you under a choice of one of two
10  * licenses.  You may choose to be licensed under the terms of the GNU
11  * General Public License (GPL) Version 2, available from the file
12  * COPYING in the main directory of this source tree, or the
13  * OpenIB.org BSD license below:
14  *
15  *     Redistribution and use in source and binary forms, with or
16  *     without modification, are permitted provided that the following
17  *     conditions are met:
18  *
19  *      - Redistributions of source code must retain the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer.
22  *
23  *      - Redistributions in binary form must reproduce the above
24  *        copyright notice, this list of conditions and the following
25  *        disclaimer in the documentation and/or other materials
26  *        provided with the distribution.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35  * SOFTWARE.
36  *
37  */
38 #include <linux/dma-mapping.h>
39 #include <linux/err.h>
40 #include <linux/idr.h>
41 #include <linux/interrupt.h>
42 #include <linux/rbtree.h>
43 #include <linux/sched.h>
44 #include <linux/spinlock.h>
45 #include <linux/workqueue.h>
46 #include <linux/completion.h>
47 #include <linux/slab.h>
48 #include <linux/module.h>
49 #include <linux/sysctl.h>
50
51 #include <rdma/iw_cm.h>
52 #include <rdma/ib_addr.h>
53 #include <rdma/iw_portmap.h>
54 #include <rdma/rdma_netlink.h>
55
56 #include "iwcm.h"
57
58 MODULE_AUTHOR("Tom Tucker");
59 MODULE_DESCRIPTION("iWARP CM");
60 MODULE_LICENSE("Dual BSD/GPL");
61
62 static const char * const iwcm_rej_reason_strs[] = {
63         [ECONNRESET]                    = "reset by remote host",
64         [ECONNREFUSED]                  = "refused by remote application",
65         [ETIMEDOUT]                     = "setup timeout",
66 };
67
68 const char *__attribute_const__ iwcm_reject_msg(int reason)
69 {
70         size_t index;
71
72         /* iWARP uses negative errnos */
73         index = -reason;
74
75         if (index < ARRAY_SIZE(iwcm_rej_reason_strs) &&
76             iwcm_rej_reason_strs[index])
77                 return iwcm_rej_reason_strs[index];
78         else
79                 return "unrecognized reason";
80 }
81 EXPORT_SYMBOL(iwcm_reject_msg);
82
83 static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = {
84         [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
85         [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
86         [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
87         [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
88         [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
89         [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
90         [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb},
91         [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb}
92 };
93
94 static struct workqueue_struct *iwcm_wq;
95 struct iwcm_work {
96         struct work_struct work;
97         struct iwcm_id_private *cm_id;
98         struct list_head list;
99         struct iw_cm_event event;
100         struct list_head free_list;
101 };
102
103 static unsigned int default_backlog = 256;
104
105 static struct ctl_table_header *iwcm_ctl_table_hdr;
106 static struct ctl_table iwcm_ctl_table[] = {
107         {
108                 .procname       = "default_backlog",
109                 .data           = &default_backlog,
110                 .maxlen         = sizeof(default_backlog),
111                 .mode           = 0644,
112                 .proc_handler   = proc_dointvec,
113         },
114         { }
115 };
116
117 /*
118  * The following services provide a mechanism for pre-allocating iwcm_work
119  * elements.  The design pre-allocates them  based on the cm_id type:
120  *      LISTENING IDS:  Get enough elements preallocated to handle the
121  *                      listen backlog.
122  *      ACTIVE IDS:     4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
123  *      PASSIVE IDS:    3: ESTABLISHED, DISCONNECT, CLOSE
124  *
125  * Allocating them in connect and listen avoids having to deal
126  * with allocation failures on the event upcall from the provider (which
127  * is called in the interrupt context).
128  *
129  * One exception is when creating the cm_id for incoming connection requests.
130  * There are two cases:
131  * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
132  *    the backlog is exceeded, then no more connection request events will
133  *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
134  *    to the provider to reject the connection request.
135  * 2) in the connection request workqueue handler, cm_conn_req_handler().
136  *    If work elements cannot be allocated for the new connect request cm_id,
137  *    then IWCM will call the provider reject method.  This is ok since
138  *    cm_conn_req_handler() runs in the workqueue thread context.
139  */
140
141 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
142 {
143         struct iwcm_work *work;
144
145         if (list_empty(&cm_id_priv->work_free_list))
146                 return NULL;
147         work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
148                           free_list);
149         list_del_init(&work->free_list);
150         return work;
151 }
152
153 static void put_work(struct iwcm_work *work)
154 {
155         list_add(&work->free_list, &work->cm_id->work_free_list);
156 }
157
158 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
159 {
160         struct list_head *e, *tmp;
161
162         list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) {
163                 list_del(e);
164                 kfree(list_entry(e, struct iwcm_work, free_list));
165         }
166 }
167
168 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
169 {
170         struct iwcm_work *work;
171
172         BUG_ON(!list_empty(&cm_id_priv->work_free_list));
173         while (count--) {
174                 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
175                 if (!work) {
176                         dealloc_work_entries(cm_id_priv);
177                         return -ENOMEM;
178                 }
179                 work->cm_id = cm_id_priv;
180                 INIT_LIST_HEAD(&work->list);
181                 put_work(work);
182         }
183         return 0;
184 }
185
186 /*
187  * Save private data from incoming connection requests to
188  * iw_cm_event, so the low level driver doesn't have to. Adjust
189  * the event ptr to point to the local copy.
190  */
191 static int copy_private_data(struct iw_cm_event *event)
192 {
193         void *p;
194
195         p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
196         if (!p)
197                 return -ENOMEM;
198         event->private_data = p;
199         return 0;
200 }
201
202 static void free_cm_id(struct iwcm_id_private *cm_id_priv)
203 {
204         dealloc_work_entries(cm_id_priv);
205         kfree(cm_id_priv);
206 }
207
208 /*
209  * Release a reference on cm_id. If the last reference is being
210  * released, free the cm_id and return 1.
211  */
212 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
213 {
214         BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
215         if (atomic_dec_and_test(&cm_id_priv->refcount)) {
216                 BUG_ON(!list_empty(&cm_id_priv->work_list));
217                 free_cm_id(cm_id_priv);
218                 return 1;
219         }
220
221         return 0;
222 }
223
224 static void add_ref(struct iw_cm_id *cm_id)
225 {
226         struct iwcm_id_private *cm_id_priv;
227         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
228         atomic_inc(&cm_id_priv->refcount);
229 }
230
231 static void rem_ref(struct iw_cm_id *cm_id)
232 {
233         struct iwcm_id_private *cm_id_priv;
234
235         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
236
237         (void)iwcm_deref_id(cm_id_priv);
238 }
239
240 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
241
242 struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
243                                  iw_cm_handler cm_handler,
244                                  void *context)
245 {
246         struct iwcm_id_private *cm_id_priv;
247
248         cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
249         if (!cm_id_priv)
250                 return ERR_PTR(-ENOMEM);
251
252         cm_id_priv->state = IW_CM_STATE_IDLE;
253         cm_id_priv->id.device = device;
254         cm_id_priv->id.cm_handler = cm_handler;
255         cm_id_priv->id.context = context;
256         cm_id_priv->id.event_handler = cm_event_handler;
257         cm_id_priv->id.add_ref = add_ref;
258         cm_id_priv->id.rem_ref = rem_ref;
259         spin_lock_init(&cm_id_priv->lock);
260         atomic_set(&cm_id_priv->refcount, 1);
261         init_waitqueue_head(&cm_id_priv->connect_wait);
262         init_completion(&cm_id_priv->destroy_comp);
263         INIT_LIST_HEAD(&cm_id_priv->work_list);
264         INIT_LIST_HEAD(&cm_id_priv->work_free_list);
265
266         return &cm_id_priv->id;
267 }
268 EXPORT_SYMBOL(iw_create_cm_id);
269
270
271 static int iwcm_modify_qp_err(struct ib_qp *qp)
272 {
273         struct ib_qp_attr qp_attr;
274
275         if (!qp)
276                 return -EINVAL;
277
278         qp_attr.qp_state = IB_QPS_ERR;
279         return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
280 }
281
282 /*
283  * This is really the RDMAC CLOSING state. It is most similar to the
284  * IB SQD QP state.
285  */
286 static int iwcm_modify_qp_sqd(struct ib_qp *qp)
287 {
288         struct ib_qp_attr qp_attr;
289
290         BUG_ON(qp == NULL);
291         qp_attr.qp_state = IB_QPS_SQD;
292         return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
293 }
294
295 /*
296  * CM_ID <-- CLOSING
297  *
298  * Block if a passive or active connection is currently being processed. Then
299  * process the event as follows:
300  * - If we are ESTABLISHED, move to CLOSING and modify the QP state
301  *   based on the abrupt flag
302  * - If the connection is already in the CLOSING or IDLE state, the peer is
303  *   disconnecting concurrently with us and we've already seen the
304  *   DISCONNECT event -- ignore the request and return 0
305  * - Disconnect on a listening endpoint returns -EINVAL
306  */
307 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
308 {
309         struct iwcm_id_private *cm_id_priv;
310         unsigned long flags;
311         int ret = 0;
312         struct ib_qp *qp = NULL;
313
314         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
315         /* Wait if we're currently in a connect or accept downcall */
316         wait_event(cm_id_priv->connect_wait,
317                    !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
318
319         spin_lock_irqsave(&cm_id_priv->lock, flags);
320         switch (cm_id_priv->state) {
321         case IW_CM_STATE_ESTABLISHED:
322                 cm_id_priv->state = IW_CM_STATE_CLOSING;
323
324                 /* QP could be <nul> for user-mode client */
325                 if (cm_id_priv->qp)
326                         qp = cm_id_priv->qp;
327                 else
328                         ret = -EINVAL;
329                 break;
330         case IW_CM_STATE_LISTEN:
331                 ret = -EINVAL;
332                 break;
333         case IW_CM_STATE_CLOSING:
334                 /* remote peer closed first */
335         case IW_CM_STATE_IDLE:
336                 /* accept or connect returned !0 */
337                 break;
338         case IW_CM_STATE_CONN_RECV:
339                 /*
340                  * App called disconnect before/without calling accept after
341                  * connect_request event delivered.
342                  */
343                 break;
344         case IW_CM_STATE_CONN_SENT:
345                 /* Can only get here if wait above fails */
346         default:
347                 BUG();
348         }
349         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
350
351         if (qp) {
352                 if (abrupt)
353                         ret = iwcm_modify_qp_err(qp);
354                 else
355                         ret = iwcm_modify_qp_sqd(qp);
356
357                 /*
358                  * If both sides are disconnecting the QP could
359                  * already be in ERR or SQD states
360                  */
361                 ret = 0;
362         }
363
364         return ret;
365 }
366 EXPORT_SYMBOL(iw_cm_disconnect);
367
368 /*
369  * CM_ID <-- DESTROYING
370  *
371  * Clean up all resources associated with the connection and release
372  * the initial reference taken by iw_create_cm_id.
373  */
374 static void destroy_cm_id(struct iw_cm_id *cm_id)
375 {
376         struct iwcm_id_private *cm_id_priv;
377         struct ib_qp *qp;
378         unsigned long flags;
379
380         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
381         /*
382          * Wait if we're currently in a connect or accept downcall. A
383          * listening endpoint should never block here.
384          */
385         wait_event(cm_id_priv->connect_wait,
386                    !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
387
388         /*
389          * Since we're deleting the cm_id, drop any events that
390          * might arrive before the last dereference.
391          */
392         set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
393
394         spin_lock_irqsave(&cm_id_priv->lock, flags);
395         qp = cm_id_priv->qp;
396         cm_id_priv->qp = NULL;
397
398         switch (cm_id_priv->state) {
399         case IW_CM_STATE_LISTEN:
400                 cm_id_priv->state = IW_CM_STATE_DESTROYING;
401                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
402                 /* destroy the listening endpoint */
403                 cm_id->device->ops.iw_destroy_listen(cm_id);
404                 spin_lock_irqsave(&cm_id_priv->lock, flags);
405                 break;
406         case IW_CM_STATE_ESTABLISHED:
407                 cm_id_priv->state = IW_CM_STATE_DESTROYING;
408                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
409                 /* Abrupt close of the connection */
410                 (void)iwcm_modify_qp_err(qp);
411                 spin_lock_irqsave(&cm_id_priv->lock, flags);
412                 break;
413         case IW_CM_STATE_IDLE:
414         case IW_CM_STATE_CLOSING:
415                 cm_id_priv->state = IW_CM_STATE_DESTROYING;
416                 break;
417         case IW_CM_STATE_CONN_RECV:
418                 /*
419                  * App called destroy before/without calling accept after
420                  * receiving connection request event notification or
421                  * returned non zero from the event callback function.
422                  * In either case, must tell the provider to reject.
423                  */
424                 cm_id_priv->state = IW_CM_STATE_DESTROYING;
425                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
426                 cm_id->device->ops.iw_reject(cm_id, NULL, 0);
427                 spin_lock_irqsave(&cm_id_priv->lock, flags);
428                 break;
429         case IW_CM_STATE_CONN_SENT:
430         case IW_CM_STATE_DESTROYING:
431         default:
432                 BUG();
433                 break;
434         }
435         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
436         if (qp)
437                 cm_id_priv->id.device->ops.iw_rem_ref(qp);
438
439         if (cm_id->mapped) {
440                 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
441                 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
442         }
443
444         (void)iwcm_deref_id(cm_id_priv);
445 }
446
447 /*
448  * This function is only called by the application thread and cannot
449  * be called by the event thread. The function will wait for all
450  * references to be released on the cm_id and then kfree the cm_id
451  * object.
452  */
453 void iw_destroy_cm_id(struct iw_cm_id *cm_id)
454 {
455         destroy_cm_id(cm_id);
456 }
457 EXPORT_SYMBOL(iw_destroy_cm_id);
458
459 /**
460  * iw_cm_check_wildcard - If IP address is 0 then use original
461  * @pm_addr: sockaddr containing the ip to check for wildcard
462  * @cm_addr: sockaddr containing the actual IP address
463  * @cm_outaddr: sockaddr to set IP addr which leaving port
464  *
465  *  Checks the pm_addr for wildcard and then sets cm_outaddr's
466  *  IP to the actual (cm_addr).
467  */
468 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
469                                  struct sockaddr_storage *cm_addr,
470                                  struct sockaddr_storage *cm_outaddr)
471 {
472         if (pm_addr->ss_family == AF_INET) {
473                 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr;
474
475                 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) {
476                         struct sockaddr_in *cm4_addr =
477                                 (struct sockaddr_in *)cm_addr;
478                         struct sockaddr_in *cm4_outaddr =
479                                 (struct sockaddr_in *)cm_outaddr;
480
481                         cm4_outaddr->sin_addr = cm4_addr->sin_addr;
482                 }
483         } else {
484                 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr;
485
486                 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) {
487                         struct sockaddr_in6 *cm6_addr =
488                                 (struct sockaddr_in6 *)cm_addr;
489                         struct sockaddr_in6 *cm6_outaddr =
490                                 (struct sockaddr_in6 *)cm_outaddr;
491
492                         cm6_outaddr->sin6_addr = cm6_addr->sin6_addr;
493                 }
494         }
495 }
496
497 /**
498  * iw_cm_map - Use portmapper to map the ports
499  * @cm_id: connection manager pointer
500  * @active: Indicates the active side when true
501  * returns nonzero for error only if iwpm_create_mapinfo() fails
502  *
503  * Tries to add a mapping for a port using the Portmapper. If
504  * successful in mapping the IP/Port it will check the remote
505  * mapped IP address for a wildcard IP address and replace the
506  * zero IP address with the remote_addr.
507  */
508 static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
509 {
510         const char *devname = dev_name(&cm_id->device->dev);
511         const char *ifname = cm_id->device->iw_ifname;
512         struct iwpm_dev_data pm_reg_msg = {};
513         struct iwpm_sa_data pm_msg;
514         int status;
515
516         if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) ||
517             strlen(ifname) >= sizeof(pm_reg_msg.if_name))
518                 return -EINVAL;
519
520         cm_id->m_local_addr = cm_id->local_addr;
521         cm_id->m_remote_addr = cm_id->remote_addr;
522
523         strcpy(pm_reg_msg.dev_name, devname);
524         strcpy(pm_reg_msg.if_name, ifname);
525
526         if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) ||
527             !iwpm_valid_pid())
528                 return 0;
529
530         cm_id->mapped = true;
531         pm_msg.loc_addr = cm_id->local_addr;
532         pm_msg.rem_addr = cm_id->remote_addr;
533         pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ?
534                        IWPM_FLAGS_NO_PORT_MAP : 0;
535         if (active)
536                 status = iwpm_add_and_query_mapping(&pm_msg,
537                                                     RDMA_NL_IWCM);
538         else
539                 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM);
540
541         if (!status) {
542                 cm_id->m_local_addr = pm_msg.mapped_loc_addr;
543                 if (active) {
544                         cm_id->m_remote_addr = pm_msg.mapped_rem_addr;
545                         iw_cm_check_wildcard(&pm_msg.mapped_rem_addr,
546                                              &cm_id->remote_addr,
547                                              &cm_id->m_remote_addr);
548                 }
549         }
550
551         return iwpm_create_mapinfo(&cm_id->local_addr,
552                                    &cm_id->m_local_addr,
553                                    RDMA_NL_IWCM, pm_msg.flags);
554 }
555
556 /*
557  * CM_ID <-- LISTEN
558  *
559  * Start listening for connect requests. Generates one CONNECT_REQUEST
560  * event for each inbound connect request.
561  */
562 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
563 {
564         struct iwcm_id_private *cm_id_priv;
565         unsigned long flags;
566         int ret;
567
568         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
569
570         if (!backlog)
571                 backlog = default_backlog;
572
573         ret = alloc_work_entries(cm_id_priv, backlog);
574         if (ret)
575                 return ret;
576
577         spin_lock_irqsave(&cm_id_priv->lock, flags);
578         switch (cm_id_priv->state) {
579         case IW_CM_STATE_IDLE:
580                 cm_id_priv->state = IW_CM_STATE_LISTEN;
581                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
582                 ret = iw_cm_map(cm_id, false);
583                 if (!ret)
584                         ret = cm_id->device->ops.iw_create_listen(cm_id,
585                                                                   backlog);
586                 if (ret)
587                         cm_id_priv->state = IW_CM_STATE_IDLE;
588                 spin_lock_irqsave(&cm_id_priv->lock, flags);
589                 break;
590         default:
591                 ret = -EINVAL;
592         }
593         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
594
595         return ret;
596 }
597 EXPORT_SYMBOL(iw_cm_listen);
598
599 /*
600  * CM_ID <-- IDLE
601  *
602  * Rejects an inbound connection request. No events are generated.
603  */
604 int iw_cm_reject(struct iw_cm_id *cm_id,
605                  const void *private_data,
606                  u8 private_data_len)
607 {
608         struct iwcm_id_private *cm_id_priv;
609         unsigned long flags;
610         int ret;
611
612         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
613         set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
614
615         spin_lock_irqsave(&cm_id_priv->lock, flags);
616         if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
617                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
618                 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
619                 wake_up_all(&cm_id_priv->connect_wait);
620                 return -EINVAL;
621         }
622         cm_id_priv->state = IW_CM_STATE_IDLE;
623         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
624
625         ret = cm_id->device->ops.iw_reject(cm_id, private_data,
626                                           private_data_len);
627
628         clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
629         wake_up_all(&cm_id_priv->connect_wait);
630
631         return ret;
632 }
633 EXPORT_SYMBOL(iw_cm_reject);
634
635 /*
636  * CM_ID <-- ESTABLISHED
637  *
638  * Accepts an inbound connection request and generates an ESTABLISHED
639  * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
640  * until the ESTABLISHED event is received from the provider.
641  */
642 int iw_cm_accept(struct iw_cm_id *cm_id,
643                  struct iw_cm_conn_param *iw_param)
644 {
645         struct iwcm_id_private *cm_id_priv;
646         struct ib_qp *qp;
647         unsigned long flags;
648         int ret;
649
650         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
651         set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
652
653         spin_lock_irqsave(&cm_id_priv->lock, flags);
654         if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
655                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
656                 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
657                 wake_up_all(&cm_id_priv->connect_wait);
658                 return -EINVAL;
659         }
660         /* Get the ib_qp given the QPN */
661         qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn);
662         if (!qp) {
663                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
664                 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
665                 wake_up_all(&cm_id_priv->connect_wait);
666                 return -EINVAL;
667         }
668         cm_id->device->ops.iw_add_ref(qp);
669         cm_id_priv->qp = qp;
670         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
671
672         ret = cm_id->device->ops.iw_accept(cm_id, iw_param);
673         if (ret) {
674                 /* An error on accept precludes provider events */
675                 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
676                 cm_id_priv->state = IW_CM_STATE_IDLE;
677                 spin_lock_irqsave(&cm_id_priv->lock, flags);
678                 qp = cm_id_priv->qp;
679                 cm_id_priv->qp = NULL;
680                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
681                 if (qp)
682                         cm_id->device->ops.iw_rem_ref(qp);
683                 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
684                 wake_up_all(&cm_id_priv->connect_wait);
685         }
686
687         return ret;
688 }
689 EXPORT_SYMBOL(iw_cm_accept);
690
691 /*
692  * Active Side: CM_ID <-- CONN_SENT
693  *
694  * If successful, results in the generation of a CONNECT_REPLY
695  * event. iw_cm_disconnect and iw_cm_destroy will block until the
696  * CONNECT_REPLY event is received from the provider.
697  */
698 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
699 {
700         struct iwcm_id_private *cm_id_priv;
701         int ret;
702         unsigned long flags;
703         struct ib_qp *qp = NULL;
704
705         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
706
707         ret = alloc_work_entries(cm_id_priv, 4);
708         if (ret)
709                 return ret;
710
711         set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
712         spin_lock_irqsave(&cm_id_priv->lock, flags);
713
714         if (cm_id_priv->state != IW_CM_STATE_IDLE) {
715                 ret = -EINVAL;
716                 goto err;
717         }
718
719         /* Get the ib_qp given the QPN */
720         qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn);
721         if (!qp) {
722                 ret = -EINVAL;
723                 goto err;
724         }
725         cm_id->device->ops.iw_add_ref(qp);
726         cm_id_priv->qp = qp;
727         cm_id_priv->state = IW_CM_STATE_CONN_SENT;
728         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
729
730         ret = iw_cm_map(cm_id, true);
731         if (!ret)
732                 ret = cm_id->device->ops.iw_connect(cm_id, iw_param);
733         if (!ret)
734                 return 0;       /* success */
735
736         spin_lock_irqsave(&cm_id_priv->lock, flags);
737         qp = cm_id_priv->qp;
738         cm_id_priv->qp = NULL;
739         cm_id_priv->state = IW_CM_STATE_IDLE;
740 err:
741         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
742         if (qp)
743                 cm_id->device->ops.iw_rem_ref(qp);
744         clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
745         wake_up_all(&cm_id_priv->connect_wait);
746         return ret;
747 }
748 EXPORT_SYMBOL(iw_cm_connect);
749
750 /*
751  * Passive Side: new CM_ID <-- CONN_RECV
752  *
753  * Handles an inbound connect request. The function creates a new
754  * iw_cm_id to represent the new connection and inherits the client
755  * callback function and other attributes from the listening parent.
756  *
757  * The work item contains a pointer to the listen_cm_id and the event. The
758  * listen_cm_id contains the client cm_handler, context and
759  * device. These are copied when the device is cloned. The event
760  * contains the new four tuple.
761  *
762  * An error on the child should not affect the parent, so this
763  * function does not return a value.
764  */
765 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
766                                 struct iw_cm_event *iw_event)
767 {
768         unsigned long flags;
769         struct iw_cm_id *cm_id;
770         struct iwcm_id_private *cm_id_priv;
771         int ret;
772
773         /*
774          * The provider should never generate a connection request
775          * event with a bad status.
776          */
777         BUG_ON(iw_event->status);
778
779         cm_id = iw_create_cm_id(listen_id_priv->id.device,
780                                 listen_id_priv->id.cm_handler,
781                                 listen_id_priv->id.context);
782         /* If the cm_id could not be created, ignore the request */
783         if (IS_ERR(cm_id))
784                 goto out;
785
786         cm_id->provider_data = iw_event->provider_data;
787         cm_id->m_local_addr = iw_event->local_addr;
788         cm_id->m_remote_addr = iw_event->remote_addr;
789         cm_id->local_addr = listen_id_priv->id.local_addr;
790
791         ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr,
792                                    &iw_event->remote_addr,
793                                    &cm_id->remote_addr,
794                                    RDMA_NL_IWCM);
795         if (ret) {
796                 cm_id->remote_addr = iw_event->remote_addr;
797         } else {
798                 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr,
799                                      &iw_event->local_addr,
800                                      &cm_id->local_addr);
801                 iw_event->local_addr = cm_id->local_addr;
802                 iw_event->remote_addr = cm_id->remote_addr;
803         }
804
805         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
806         cm_id_priv->state = IW_CM_STATE_CONN_RECV;
807
808         /*
809          * We could be destroying the listening id. If so, ignore this
810          * upcall.
811          */
812         spin_lock_irqsave(&listen_id_priv->lock, flags);
813         if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
814                 spin_unlock_irqrestore(&listen_id_priv->lock, flags);
815                 iw_cm_reject(cm_id, NULL, 0);
816                 iw_destroy_cm_id(cm_id);
817                 goto out;
818         }
819         spin_unlock_irqrestore(&listen_id_priv->lock, flags);
820
821         ret = alloc_work_entries(cm_id_priv, 3);
822         if (ret) {
823                 iw_cm_reject(cm_id, NULL, 0);
824                 iw_destroy_cm_id(cm_id);
825                 goto out;
826         }
827
828         /* Call the client CM handler */
829         ret = cm_id->cm_handler(cm_id, iw_event);
830         if (ret) {
831                 iw_cm_reject(cm_id, NULL, 0);
832                 iw_destroy_cm_id(cm_id);
833         }
834
835 out:
836         if (iw_event->private_data_len)
837                 kfree(iw_event->private_data);
838 }
839
840 /*
841  * Passive Side: CM_ID <-- ESTABLISHED
842  *
843  * The provider generated an ESTABLISHED event which means that
844  * the MPA negotion has completed successfully and we are now in MPA
845  * FPDU mode.
846  *
847  * This event can only be received in the CONN_RECV state. If the
848  * remote peer closed, the ESTABLISHED event would be received followed
849  * by the CLOSE event. If the app closes, it will block until we wake
850  * it up after processing this event.
851  */
852 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
853                                struct iw_cm_event *iw_event)
854 {
855         unsigned long flags;
856         int ret;
857
858         spin_lock_irqsave(&cm_id_priv->lock, flags);
859
860         /*
861          * We clear the CONNECT_WAIT bit here to allow the callback
862          * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
863          * from a callback handler is not allowed.
864          */
865         clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
866         BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
867         cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
868         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
869         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
870         wake_up_all(&cm_id_priv->connect_wait);
871
872         return ret;
873 }
874
875 /*
876  * Active Side: CM_ID <-- ESTABLISHED
877  *
878  * The app has called connect and is waiting for the established event to
879  * post it's requests to the server. This event will wake up anyone
880  * blocked in iw_cm_disconnect or iw_destroy_id.
881  */
882 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
883                                struct iw_cm_event *iw_event)
884 {
885         struct ib_qp *qp = NULL;
886         unsigned long flags;
887         int ret;
888
889         spin_lock_irqsave(&cm_id_priv->lock, flags);
890         /*
891          * Clear the connect wait bit so a callback function calling
892          * iw_cm_disconnect will not wait and deadlock this thread
893          */
894         clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
895         BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
896         if (iw_event->status == 0) {
897                 cm_id_priv->id.m_local_addr = iw_event->local_addr;
898                 cm_id_priv->id.m_remote_addr = iw_event->remote_addr;
899                 iw_event->local_addr = cm_id_priv->id.local_addr;
900                 iw_event->remote_addr = cm_id_priv->id.remote_addr;
901                 cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
902         } else {
903                 /* REJECTED or RESET */
904                 qp = cm_id_priv->qp;
905                 cm_id_priv->qp = NULL;
906                 cm_id_priv->state = IW_CM_STATE_IDLE;
907         }
908         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
909         if (qp)
910                 cm_id_priv->id.device->ops.iw_rem_ref(qp);
911         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
912
913         if (iw_event->private_data_len)
914                 kfree(iw_event->private_data);
915
916         /* Wake up waiters on connect complete */
917         wake_up_all(&cm_id_priv->connect_wait);
918
919         return ret;
920 }
921
922 /*
923  * CM_ID <-- CLOSING
924  *
925  * If in the ESTABLISHED state, move to CLOSING.
926  */
927 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
928                                   struct iw_cm_event *iw_event)
929 {
930         unsigned long flags;
931
932         spin_lock_irqsave(&cm_id_priv->lock, flags);
933         if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
934                 cm_id_priv->state = IW_CM_STATE_CLOSING;
935         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
936 }
937
938 /*
939  * CM_ID <-- IDLE
940  *
941  * If in the ESTBLISHED or CLOSING states, the QP will have have been
942  * moved by the provider to the ERR state. Disassociate the CM_ID from
943  * the QP,  move to IDLE, and remove the 'connected' reference.
944  *
945  * If in some other state, the cm_id was destroyed asynchronously.
946  * This is the last reference that will result in waking up
947  * the app thread blocked in iw_destroy_cm_id.
948  */
949 static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
950                                   struct iw_cm_event *iw_event)
951 {
952         struct ib_qp *qp;
953         unsigned long flags;
954         int ret = 0, notify_event = 0;
955         spin_lock_irqsave(&cm_id_priv->lock, flags);
956         qp = cm_id_priv->qp;
957         cm_id_priv->qp = NULL;
958
959         switch (cm_id_priv->state) {
960         case IW_CM_STATE_ESTABLISHED:
961         case IW_CM_STATE_CLOSING:
962                 cm_id_priv->state = IW_CM_STATE_IDLE;
963                 notify_event = 1;
964                 break;
965         case IW_CM_STATE_DESTROYING:
966                 break;
967         default:
968                 BUG();
969         }
970         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
971
972         if (qp)
973                 cm_id_priv->id.device->ops.iw_rem_ref(qp);
974         if (notify_event)
975                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
976         return ret;
977 }
978
979 static int process_event(struct iwcm_id_private *cm_id_priv,
980                          struct iw_cm_event *iw_event)
981 {
982         int ret = 0;
983
984         switch (iw_event->event) {
985         case IW_CM_EVENT_CONNECT_REQUEST:
986                 cm_conn_req_handler(cm_id_priv, iw_event);
987                 break;
988         case IW_CM_EVENT_CONNECT_REPLY:
989                 ret = cm_conn_rep_handler(cm_id_priv, iw_event);
990                 break;
991         case IW_CM_EVENT_ESTABLISHED:
992                 ret = cm_conn_est_handler(cm_id_priv, iw_event);
993                 break;
994         case IW_CM_EVENT_DISCONNECT:
995                 cm_disconnect_handler(cm_id_priv, iw_event);
996                 break;
997         case IW_CM_EVENT_CLOSE:
998                 ret = cm_close_handler(cm_id_priv, iw_event);
999                 break;
1000         default:
1001                 BUG();
1002         }
1003
1004         return ret;
1005 }
1006
1007 /*
1008  * Process events on the work_list for the cm_id. If the callback
1009  * function requests that the cm_id be deleted, a flag is set in the
1010  * cm_id flags to indicate that when the last reference is
1011  * removed, the cm_id is to be destroyed. This is necessary to
1012  * distinguish between an object that will be destroyed by the app
1013  * thread asleep on the destroy_comp list vs. an object destroyed
1014  * here synchronously when the last reference is removed.
1015  */
1016 static void cm_work_handler(struct work_struct *_work)
1017 {
1018         struct iwcm_work *work = container_of(_work, struct iwcm_work, work);
1019         struct iw_cm_event levent;
1020         struct iwcm_id_private *cm_id_priv = work->cm_id;
1021         unsigned long flags;
1022         int empty;
1023         int ret = 0;
1024
1025         spin_lock_irqsave(&cm_id_priv->lock, flags);
1026         empty = list_empty(&cm_id_priv->work_list);
1027         while (!empty) {
1028                 work = list_entry(cm_id_priv->work_list.next,
1029                                   struct iwcm_work, list);
1030                 list_del_init(&work->list);
1031                 empty = list_empty(&cm_id_priv->work_list);
1032                 levent = work->event;
1033                 put_work(work);
1034                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1035
1036                 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
1037                         ret = process_event(cm_id_priv, &levent);
1038                         if (ret)
1039                                 destroy_cm_id(&cm_id_priv->id);
1040                 } else
1041                         pr_debug("dropping event %d\n", levent.event);
1042                 if (iwcm_deref_id(cm_id_priv))
1043                         return;
1044                 if (empty)
1045                         return;
1046                 spin_lock_irqsave(&cm_id_priv->lock, flags);
1047         }
1048         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1049 }
1050
1051 /*
1052  * This function is called on interrupt context. Schedule events on
1053  * the iwcm_wq thread to allow callback functions to downcall into
1054  * the CM and/or block.  Events are queued to a per-CM_ID
1055  * work_list. If this is the first event on the work_list, the work
1056  * element is also queued on the iwcm_wq thread.
1057  *
1058  * Each event holds a reference on the cm_id. Until the last posted
1059  * event has been delivered and processed, the cm_id cannot be
1060  * deleted.
1061  *
1062  * Returns:
1063  *            0 - the event was handled.
1064  *      -ENOMEM - the event was not handled due to lack of resources.
1065  */
1066 static int cm_event_handler(struct iw_cm_id *cm_id,
1067                              struct iw_cm_event *iw_event)
1068 {
1069         struct iwcm_work *work;
1070         struct iwcm_id_private *cm_id_priv;
1071         unsigned long flags;
1072         int ret = 0;
1073
1074         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
1075
1076         spin_lock_irqsave(&cm_id_priv->lock, flags);
1077         work = get_work(cm_id_priv);
1078         if (!work) {
1079                 ret = -ENOMEM;
1080                 goto out;
1081         }
1082
1083         INIT_WORK(&work->work, cm_work_handler);
1084         work->cm_id = cm_id_priv;
1085         work->event = *iw_event;
1086
1087         if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
1088              work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
1089             work->event.private_data_len) {
1090                 ret = copy_private_data(&work->event);
1091                 if (ret) {
1092                         put_work(work);
1093                         goto out;
1094                 }
1095         }
1096
1097         atomic_inc(&cm_id_priv->refcount);
1098         if (list_empty(&cm_id_priv->work_list)) {
1099                 list_add_tail(&work->list, &cm_id_priv->work_list);
1100                 queue_work(iwcm_wq, &work->work);
1101         } else
1102                 list_add_tail(&work->list, &cm_id_priv->work_list);
1103 out:
1104         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1105         return ret;
1106 }
1107
1108 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
1109                                   struct ib_qp_attr *qp_attr,
1110                                   int *qp_attr_mask)
1111 {
1112         unsigned long flags;
1113         int ret;
1114
1115         spin_lock_irqsave(&cm_id_priv->lock, flags);
1116         switch (cm_id_priv->state) {
1117         case IW_CM_STATE_IDLE:
1118         case IW_CM_STATE_CONN_SENT:
1119         case IW_CM_STATE_CONN_RECV:
1120         case IW_CM_STATE_ESTABLISHED:
1121                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
1122                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
1123                                            IB_ACCESS_REMOTE_READ;
1124                 ret = 0;
1125                 break;
1126         default:
1127                 ret = -EINVAL;
1128                 break;
1129         }
1130         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1131         return ret;
1132 }
1133
1134 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
1135                                   struct ib_qp_attr *qp_attr,
1136                                   int *qp_attr_mask)
1137 {
1138         unsigned long flags;
1139         int ret;
1140
1141         spin_lock_irqsave(&cm_id_priv->lock, flags);
1142         switch (cm_id_priv->state) {
1143         case IW_CM_STATE_IDLE:
1144         case IW_CM_STATE_CONN_SENT:
1145         case IW_CM_STATE_CONN_RECV:
1146         case IW_CM_STATE_ESTABLISHED:
1147                 *qp_attr_mask = 0;
1148                 ret = 0;
1149                 break;
1150         default:
1151                 ret = -EINVAL;
1152                 break;
1153         }
1154         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1155         return ret;
1156 }
1157
1158 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
1159                        struct ib_qp_attr *qp_attr,
1160                        int *qp_attr_mask)
1161 {
1162         struct iwcm_id_private *cm_id_priv;
1163         int ret;
1164
1165         cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
1166         switch (qp_attr->qp_state) {
1167         case IB_QPS_INIT:
1168         case IB_QPS_RTR:
1169                 ret = iwcm_init_qp_init_attr(cm_id_priv,
1170                                              qp_attr, qp_attr_mask);
1171                 break;
1172         case IB_QPS_RTS:
1173                 ret = iwcm_init_qp_rts_attr(cm_id_priv,
1174                                             qp_attr, qp_attr_mask);
1175                 break;
1176         default:
1177                 ret = -EINVAL;
1178                 break;
1179         }
1180         return ret;
1181 }
1182 EXPORT_SYMBOL(iw_cm_init_qp_attr);
1183
1184 static int __init iw_cm_init(void)
1185 {
1186         int ret;
1187
1188         ret = iwpm_init(RDMA_NL_IWCM);
1189         if (ret)
1190                 pr_err("iw_cm: couldn't init iwpm\n");
1191         else
1192                 rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
1193         iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0);
1194         if (!iwcm_wq)
1195                 return -ENOMEM;
1196
1197         iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
1198                                                  iwcm_ctl_table);
1199         if (!iwcm_ctl_table_hdr) {
1200                 pr_err("iw_cm: couldn't register sysctl paths\n");
1201                 destroy_workqueue(iwcm_wq);
1202                 return -ENOMEM;
1203         }
1204
1205         return 0;
1206 }
1207
1208 static void __exit iw_cm_cleanup(void)
1209 {
1210         unregister_net_sysctl_table(iwcm_ctl_table_hdr);
1211         destroy_workqueue(iwcm_wq);
1212         rdma_nl_unregister(RDMA_NL_IWCM);
1213         iwpm_exit(RDMA_NL_IWCM);
1214 }
1215
1216 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2);
1217
1218 module_init(iw_cm_init);
1219 module_exit(iw_cm_cleanup);