Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/module.h>
40 #include <linux/err.h>
41 #include <linux/idr.h>
42 #include <linux/interrupt.h>
43 #include <linux/random.h>
44 #include <linux/rbtree.h>
45 #include <linux/spinlock.h>
46 #include <linux/slab.h>
47 #include <linux/sysfs.h>
48 #include <linux/workqueue.h>
49 #include <linux/kdev_t.h>
50 #include <linux/etherdevice.h>
51
52 #include <rdma/ib_cache.h>
53 #include <rdma/ib_cm.h>
54 #include "cm_msgs.h"
55
56 MODULE_AUTHOR("Sean Hefty");
57 MODULE_DESCRIPTION("InfiniBand CM");
58 MODULE_LICENSE("Dual BSD/GPL");
59
60 static const char * const ibcm_rej_reason_strs[] = {
61         [IB_CM_REJ_NO_QP]                       = "no QP",
62         [IB_CM_REJ_NO_EEC]                      = "no EEC",
63         [IB_CM_REJ_NO_RESOURCES]                = "no resources",
64         [IB_CM_REJ_TIMEOUT]                     = "timeout",
65         [IB_CM_REJ_UNSUPPORTED]                 = "unsupported",
66         [IB_CM_REJ_INVALID_COMM_ID]             = "invalid comm ID",
67         [IB_CM_REJ_INVALID_COMM_INSTANCE]       = "invalid comm instance",
68         [IB_CM_REJ_INVALID_SERVICE_ID]          = "invalid service ID",
69         [IB_CM_REJ_INVALID_TRANSPORT_TYPE]      = "invalid transport type",
70         [IB_CM_REJ_STALE_CONN]                  = "stale conn",
71         [IB_CM_REJ_RDC_NOT_EXIST]               = "RDC not exist",
72         [IB_CM_REJ_INVALID_GID]                 = "invalid GID",
73         [IB_CM_REJ_INVALID_LID]                 = "invalid LID",
74         [IB_CM_REJ_INVALID_SL]                  = "invalid SL",
75         [IB_CM_REJ_INVALID_TRAFFIC_CLASS]       = "invalid traffic class",
76         [IB_CM_REJ_INVALID_HOP_LIMIT]           = "invalid hop limit",
77         [IB_CM_REJ_INVALID_PACKET_RATE]         = "invalid packet rate",
78         [IB_CM_REJ_INVALID_ALT_GID]             = "invalid alt GID",
79         [IB_CM_REJ_INVALID_ALT_LID]             = "invalid alt LID",
80         [IB_CM_REJ_INVALID_ALT_SL]              = "invalid alt SL",
81         [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]   = "invalid alt traffic class",
82         [IB_CM_REJ_INVALID_ALT_HOP_LIMIT]       = "invalid alt hop limit",
83         [IB_CM_REJ_INVALID_ALT_PACKET_RATE]     = "invalid alt packet rate",
84         [IB_CM_REJ_PORT_CM_REDIRECT]            = "port CM redirect",
85         [IB_CM_REJ_PORT_REDIRECT]               = "port redirect",
86         [IB_CM_REJ_INVALID_MTU]                 = "invalid MTU",
87         [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources",
88         [IB_CM_REJ_CONSUMER_DEFINED]            = "consumer defined",
89         [IB_CM_REJ_INVALID_RNR_RETRY]           = "invalid RNR retry",
90         [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]     = "duplicate local comm ID",
91         [IB_CM_REJ_INVALID_CLASS_VERSION]       = "invalid class version",
92         [IB_CM_REJ_INVALID_FLOW_LABEL]          = "invalid flow label",
93         [IB_CM_REJ_INVALID_ALT_FLOW_LABEL]      = "invalid alt flow label",
94 };
95
96 const char *__attribute_const__ ibcm_reject_msg(int reason)
97 {
98         size_t index = reason;
99
100         if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
101             ibcm_rej_reason_strs[index])
102                 return ibcm_rej_reason_strs[index];
103         else
104                 return "unrecognized reason";
105 }
106 EXPORT_SYMBOL(ibcm_reject_msg);
107
108 static void cm_add_one(struct ib_device *device);
109 static void cm_remove_one(struct ib_device *device, void *client_data);
110
111 static struct ib_client cm_client = {
112         .name   = "cm",
113         .add    = cm_add_one,
114         .remove = cm_remove_one
115 };
116
117 static struct ib_cm {
118         spinlock_t lock;
119         struct list_head device_list;
120         rwlock_t device_lock;
121         struct rb_root listen_service_table;
122         u64 listen_service_id;
123         /* struct rb_root peer_service_table; todo: fix peer to peer */
124         struct rb_root remote_qp_table;
125         struct rb_root remote_id_table;
126         struct rb_root remote_sidr_table;
127         struct idr local_id_table;
128         __be32 random_id_operand;
129         struct list_head timewait_list;
130         struct workqueue_struct *wq;
131         /* Sync on cm change port state */
132         spinlock_t state_lock;
133 } cm;
134
135 /* Counter indexes ordered by attribute ID */
136 enum {
137         CM_REQ_COUNTER,
138         CM_MRA_COUNTER,
139         CM_REJ_COUNTER,
140         CM_REP_COUNTER,
141         CM_RTU_COUNTER,
142         CM_DREQ_COUNTER,
143         CM_DREP_COUNTER,
144         CM_SIDR_REQ_COUNTER,
145         CM_SIDR_REP_COUNTER,
146         CM_LAP_COUNTER,
147         CM_APR_COUNTER,
148         CM_ATTR_COUNT,
149         CM_ATTR_ID_OFFSET = 0x0010,
150 };
151
152 enum {
153         CM_XMIT,
154         CM_XMIT_RETRIES,
155         CM_RECV,
156         CM_RECV_DUPLICATES,
157         CM_COUNTER_GROUPS
158 };
159
160 static char const counter_group_names[CM_COUNTER_GROUPS]
161                                      [sizeof("cm_rx_duplicates")] = {
162         "cm_tx_msgs", "cm_tx_retries",
163         "cm_rx_msgs", "cm_rx_duplicates"
164 };
165
166 struct cm_counter_group {
167         struct kobject obj;
168         atomic_long_t counter[CM_ATTR_COUNT];
169 };
170
171 struct cm_counter_attribute {
172         struct attribute attr;
173         int index;
174 };
175
176 #define CM_COUNTER_ATTR(_name, _index) \
177 struct cm_counter_attribute cm_##_name##_counter_attr = { \
178         .attr = { .name = __stringify(_name), .mode = 0444 }, \
179         .index = _index \
180 }
181
182 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
183 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
184 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
185 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
186 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
187 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
188 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
189 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
190 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
191 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
192 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
193
194 static struct attribute *cm_counter_default_attrs[] = {
195         &cm_req_counter_attr.attr,
196         &cm_mra_counter_attr.attr,
197         &cm_rej_counter_attr.attr,
198         &cm_rep_counter_attr.attr,
199         &cm_rtu_counter_attr.attr,
200         &cm_dreq_counter_attr.attr,
201         &cm_drep_counter_attr.attr,
202         &cm_sidr_req_counter_attr.attr,
203         &cm_sidr_rep_counter_attr.attr,
204         &cm_lap_counter_attr.attr,
205         &cm_apr_counter_attr.attr,
206         NULL
207 };
208
209 struct cm_port {
210         struct cm_device *cm_dev;
211         struct ib_mad_agent *mad_agent;
212         struct kobject port_obj;
213         u8 port_num;
214         struct list_head cm_priv_prim_list;
215         struct list_head cm_priv_altr_list;
216         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
217 };
218
219 struct cm_device {
220         struct list_head list;
221         struct ib_device *ib_device;
222         struct device *device;
223         u8 ack_delay;
224         int going_down;
225         struct cm_port *port[0];
226 };
227
228 struct cm_av {
229         struct cm_port *port;
230         union ib_gid dgid;
231         struct rdma_ah_attr ah_attr;
232         u16 pkey_index;
233         u8 timeout;
234 };
235
236 struct cm_work {
237         struct delayed_work work;
238         struct list_head list;
239         struct cm_port *port;
240         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
241         __be32 local_id;                        /* Established / timewait */
242         __be32 remote_id;
243         struct ib_cm_event cm_event;
244         struct sa_path_rec path[0];
245 };
246
247 struct cm_timewait_info {
248         struct cm_work work;                    /* Must be first. */
249         struct list_head list;
250         struct rb_node remote_qp_node;
251         struct rb_node remote_id_node;
252         __be64 remote_ca_guid;
253         __be32 remote_qpn;
254         u8 inserted_remote_qp;
255         u8 inserted_remote_id;
256 };
257
258 struct cm_id_private {
259         struct ib_cm_id id;
260
261         struct rb_node service_node;
262         struct rb_node sidr_id_node;
263         spinlock_t lock;        /* Do not acquire inside cm.lock */
264         struct completion comp;
265         atomic_t refcount;
266         /* Number of clients sharing this ib_cm_id. Only valid for listeners.
267          * Protected by the cm.lock spinlock. */
268         int listen_sharecount;
269
270         struct ib_mad_send_buf *msg;
271         struct cm_timewait_info *timewait_info;
272         /* todo: use alternate port on send failure */
273         struct cm_av av;
274         struct cm_av alt_av;
275
276         void *private_data;
277         __be64 tid;
278         __be32 local_qpn;
279         __be32 remote_qpn;
280         enum ib_qp_type qp_type;
281         __be32 sq_psn;
282         __be32 rq_psn;
283         int timeout_ms;
284         enum ib_mtu path_mtu;
285         __be16 pkey;
286         u8 private_data_len;
287         u8 max_cm_retries;
288         u8 peer_to_peer;
289         u8 responder_resources;
290         u8 initiator_depth;
291         u8 retry_count;
292         u8 rnr_retry_count;
293         u8 service_timeout;
294         u8 target_ack_delay;
295
296         struct list_head prim_list;
297         struct list_head altr_list;
298         /* Indicates that the send port mad is registered and av is set */
299         int prim_send_port_not_ready;
300         int altr_send_port_not_ready;
301
302         struct list_head work_list;
303         atomic_t work_count;
304 };
305
306 static void cm_work_handler(struct work_struct *work);
307
308 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
309 {
310         if (atomic_dec_and_test(&cm_id_priv->refcount))
311                 complete(&cm_id_priv->comp);
312 }
313
314 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
315                         struct ib_mad_send_buf **msg)
316 {
317         struct ib_mad_agent *mad_agent;
318         struct ib_mad_send_buf *m;
319         struct ib_ah *ah;
320         struct cm_av *av;
321         unsigned long flags, flags2;
322         int ret = 0;
323
324         /* don't let the port to be released till the agent is down */
325         spin_lock_irqsave(&cm.state_lock, flags2);
326         spin_lock_irqsave(&cm.lock, flags);
327         if (!cm_id_priv->prim_send_port_not_ready)
328                 av = &cm_id_priv->av;
329         else if (!cm_id_priv->altr_send_port_not_ready &&
330                  (cm_id_priv->alt_av.port))
331                 av = &cm_id_priv->alt_av;
332         else {
333                 pr_info("%s: not valid CM id\n", __func__);
334                 ret = -ENODEV;
335                 spin_unlock_irqrestore(&cm.lock, flags);
336                 goto out;
337         }
338         spin_unlock_irqrestore(&cm.lock, flags);
339         /* Make sure the port haven't released the mad yet */
340         mad_agent = cm_id_priv->av.port->mad_agent;
341         if (!mad_agent) {
342                 pr_info("%s: not a valid MAD agent\n", __func__);
343                 ret = -ENODEV;
344                 goto out;
345         }
346         ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr);
347         if (IS_ERR(ah)) {
348                 ret = PTR_ERR(ah);
349                 goto out;
350         }
351
352         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
353                                av->pkey_index,
354                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
355                                GFP_ATOMIC,
356                                IB_MGMT_BASE_VERSION);
357         if (IS_ERR(m)) {
358                 rdma_destroy_ah(ah);
359                 ret = PTR_ERR(m);
360                 goto out;
361         }
362
363         /* Timeout set by caller if response is expected. */
364         m->ah = ah;
365         m->retries = cm_id_priv->max_cm_retries;
366
367         atomic_inc(&cm_id_priv->refcount);
368         m->context[0] = cm_id_priv;
369         *msg = m;
370
371 out:
372         spin_unlock_irqrestore(&cm.state_lock, flags2);
373         return ret;
374 }
375
376 static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
377                                                            struct ib_mad_recv_wc *mad_recv_wc)
378 {
379         return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
380                                   0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
381                                   GFP_ATOMIC,
382                                   IB_MGMT_BASE_VERSION);
383 }
384
385 static int cm_create_response_msg_ah(struct cm_port *port,
386                                      struct ib_mad_recv_wc *mad_recv_wc,
387                                      struct ib_mad_send_buf *msg)
388 {
389         struct ib_ah *ah;
390
391         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
392                                   mad_recv_wc->recv_buf.grh, port->port_num);
393         if (IS_ERR(ah))
394                 return PTR_ERR(ah);
395
396         msg->ah = ah;
397         return 0;
398 }
399
400 static void cm_free_msg(struct ib_mad_send_buf *msg)
401 {
402         if (msg->ah)
403                 rdma_destroy_ah(msg->ah);
404         if (msg->context[0])
405                 cm_deref_id(msg->context[0]);
406         ib_free_send_mad(msg);
407 }
408
409 static int cm_alloc_response_msg(struct cm_port *port,
410                                  struct ib_mad_recv_wc *mad_recv_wc,
411                                  struct ib_mad_send_buf **msg)
412 {
413         struct ib_mad_send_buf *m;
414         int ret;
415
416         m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
417         if (IS_ERR(m))
418                 return PTR_ERR(m);
419
420         ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
421         if (ret) {
422                 cm_free_msg(m);
423                 return ret;
424         }
425
426         *msg = m;
427         return 0;
428 }
429
430 static void * cm_copy_private_data(const void *private_data,
431                                    u8 private_data_len)
432 {
433         void *data;
434
435         if (!private_data || !private_data_len)
436                 return NULL;
437
438         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
439         if (!data)
440                 return ERR_PTR(-ENOMEM);
441
442         return data;
443 }
444
445 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
446                                  void *private_data, u8 private_data_len)
447 {
448         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
449                 kfree(cm_id_priv->private_data);
450
451         cm_id_priv->private_data = private_data;
452         cm_id_priv->private_data_len = private_data_len;
453 }
454
455 static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
456                               struct ib_grh *grh, struct cm_av *av)
457 {
458         struct rdma_ah_attr new_ah_attr;
459         int ret;
460
461         av->port = port;
462         av->pkey_index = wc->pkey_index;
463
464         /*
465          * av->ah_attr might be initialized based on past wc during incoming
466          * connect request or while sending out connect request. So initialize
467          * a new ah_attr on stack. If initialization fails, old ah_attr is
468          * used for sending any responses. If initialization is successful,
469          * than new ah_attr is used by overwriting old one.
470          */
471         ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
472                                       port->port_num, wc,
473                                       grh, &new_ah_attr);
474         if (ret)
475                 return ret;
476
477         rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
478         return 0;
479 }
480
481 static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
482                                    struct ib_grh *grh, struct cm_av *av)
483 {
484         av->port = port;
485         av->pkey_index = wc->pkey_index;
486         return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
487                                        port->port_num, wc,
488                                        grh, &av->ah_attr);
489 }
490
491 static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
492                                   struct cm_av *av,
493                                   struct cm_port *port)
494 {
495         unsigned long flags;
496         int ret = 0;
497
498         spin_lock_irqsave(&cm.lock, flags);
499
500         if (&cm_id_priv->av == av)
501                 list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
502         else if (&cm_id_priv->alt_av == av)
503                 list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
504         else
505                 ret = -EINVAL;
506
507         spin_unlock_irqrestore(&cm.lock, flags);
508         return ret;
509 }
510
511 static struct cm_port *
512 get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
513 {
514         struct cm_device *cm_dev;
515         struct cm_port *port = NULL;
516         unsigned long flags;
517
518         if (attr) {
519                 read_lock_irqsave(&cm.device_lock, flags);
520                 list_for_each_entry(cm_dev, &cm.device_list, list) {
521                         if (cm_dev->ib_device == attr->device) {
522                                 port = cm_dev->port[attr->port_num - 1];
523                                 break;
524                         }
525                 }
526                 read_unlock_irqrestore(&cm.device_lock, flags);
527         } else {
528                 /* SGID attribute can be NULL in following
529                  * conditions.
530                  * (a) Alternative path
531                  * (b) IB link layer without GRH
532                  * (c) LAP send messages
533                  */
534                 read_lock_irqsave(&cm.device_lock, flags);
535                 list_for_each_entry(cm_dev, &cm.device_list, list) {
536                         attr = rdma_find_gid(cm_dev->ib_device,
537                                              &path->sgid,
538                                              sa_conv_pathrec_to_gid_type(path),
539                                              NULL);
540                         if (!IS_ERR(attr)) {
541                                 port = cm_dev->port[attr->port_num - 1];
542                                 break;
543                         }
544                 }
545                 read_unlock_irqrestore(&cm.device_lock, flags);
546                 if (port)
547                         rdma_put_gid_attr(attr);
548         }
549         return port;
550 }
551
552 static int cm_init_av_by_path(struct sa_path_rec *path,
553                               const struct ib_gid_attr *sgid_attr,
554                               struct cm_av *av,
555                               struct cm_id_private *cm_id_priv)
556 {
557         struct rdma_ah_attr new_ah_attr;
558         struct cm_device *cm_dev;
559         struct cm_port *port;
560         int ret;
561
562         port = get_cm_port_from_path(path, sgid_attr);
563         if (!port)
564                 return -EINVAL;
565         cm_dev = port->cm_dev;
566
567         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
568                                   be16_to_cpu(path->pkey), &av->pkey_index);
569         if (ret)
570                 return ret;
571
572         av->port = port;
573
574         /*
575          * av->ah_attr might be initialized based on wc or during
576          * request processing time which might have reference to sgid_attr.
577          * So initialize a new ah_attr on stack.
578          * If initialization fails, old ah_attr is used for sending any
579          * responses. If initialization is successful, than new ah_attr
580          * is used by overwriting the old one. So that right ah_attr
581          * can be used to return an error response.
582          */
583         ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
584                                         &new_ah_attr, sgid_attr);
585         if (ret)
586                 return ret;
587
588         av->timeout = path->packet_life_time + 1;
589
590         ret = add_cm_id_to_port_list(cm_id_priv, av, port);
591         if (ret) {
592                 rdma_destroy_ah_attr(&new_ah_attr);
593                 return ret;
594         }
595         rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
596         return 0;
597 }
598
599 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
600 {
601         unsigned long flags;
602         int id;
603
604         idr_preload(GFP_KERNEL);
605         spin_lock_irqsave(&cm.lock, flags);
606
607         id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
608
609         spin_unlock_irqrestore(&cm.lock, flags);
610         idr_preload_end();
611
612         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
613         return id < 0 ? id : 0;
614 }
615
616 static void cm_free_id(__be32 local_id)
617 {
618         spin_lock_irq(&cm.lock);
619         idr_remove(&cm.local_id_table,
620                    (__force int) (local_id ^ cm.random_id_operand));
621         spin_unlock_irq(&cm.lock);
622 }
623
624 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
625 {
626         struct cm_id_private *cm_id_priv;
627
628         cm_id_priv = idr_find(&cm.local_id_table,
629                               (__force int) (local_id ^ cm.random_id_operand));
630         if (cm_id_priv) {
631                 if (cm_id_priv->id.remote_id == remote_id)
632                         atomic_inc(&cm_id_priv->refcount);
633                 else
634                         cm_id_priv = NULL;
635         }
636
637         return cm_id_priv;
638 }
639
640 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
641 {
642         struct cm_id_private *cm_id_priv;
643
644         spin_lock_irq(&cm.lock);
645         cm_id_priv = cm_get_id(local_id, remote_id);
646         spin_unlock_irq(&cm.lock);
647
648         return cm_id_priv;
649 }
650
651 /*
652  * Trivial helpers to strip endian annotation and compare; the
653  * endianness doesn't actually matter since we just need a stable
654  * order for the RB tree.
655  */
656 static int be32_lt(__be32 a, __be32 b)
657 {
658         return (__force u32) a < (__force u32) b;
659 }
660
661 static int be32_gt(__be32 a, __be32 b)
662 {
663         return (__force u32) a > (__force u32) b;
664 }
665
666 static int be64_lt(__be64 a, __be64 b)
667 {
668         return (__force u64) a < (__force u64) b;
669 }
670
671 static int be64_gt(__be64 a, __be64 b)
672 {
673         return (__force u64) a > (__force u64) b;
674 }
675
676 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
677 {
678         struct rb_node **link = &cm.listen_service_table.rb_node;
679         struct rb_node *parent = NULL;
680         struct cm_id_private *cur_cm_id_priv;
681         __be64 service_id = cm_id_priv->id.service_id;
682         __be64 service_mask = cm_id_priv->id.service_mask;
683
684         while (*link) {
685                 parent = *link;
686                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
687                                           service_node);
688                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
689                     (service_mask & cur_cm_id_priv->id.service_id) &&
690                     (cm_id_priv->id.device == cur_cm_id_priv->id.device))
691                         return cur_cm_id_priv;
692
693                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
694                         link = &(*link)->rb_left;
695                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
696                         link = &(*link)->rb_right;
697                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
698                         link = &(*link)->rb_left;
699                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
700                         link = &(*link)->rb_right;
701                 else
702                         link = &(*link)->rb_right;
703         }
704         rb_link_node(&cm_id_priv->service_node, parent, link);
705         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
706         return NULL;
707 }
708
709 static struct cm_id_private * cm_find_listen(struct ib_device *device,
710                                              __be64 service_id)
711 {
712         struct rb_node *node = cm.listen_service_table.rb_node;
713         struct cm_id_private *cm_id_priv;
714
715         while (node) {
716                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
717                 if ((cm_id_priv->id.service_mask & service_id) ==
718                      cm_id_priv->id.service_id &&
719                     (cm_id_priv->id.device == device))
720                         return cm_id_priv;
721
722                 if (device < cm_id_priv->id.device)
723                         node = node->rb_left;
724                 else if (device > cm_id_priv->id.device)
725                         node = node->rb_right;
726                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
727                         node = node->rb_left;
728                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
729                         node = node->rb_right;
730                 else
731                         node = node->rb_right;
732         }
733         return NULL;
734 }
735
736 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
737                                                      *timewait_info)
738 {
739         struct rb_node **link = &cm.remote_id_table.rb_node;
740         struct rb_node *parent = NULL;
741         struct cm_timewait_info *cur_timewait_info;
742         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
743         __be32 remote_id = timewait_info->work.remote_id;
744
745         while (*link) {
746                 parent = *link;
747                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
748                                              remote_id_node);
749                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
750                         link = &(*link)->rb_left;
751                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
752                         link = &(*link)->rb_right;
753                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
754                         link = &(*link)->rb_left;
755                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
756                         link = &(*link)->rb_right;
757                 else
758                         return cur_timewait_info;
759         }
760         timewait_info->inserted_remote_id = 1;
761         rb_link_node(&timewait_info->remote_id_node, parent, link);
762         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
763         return NULL;
764 }
765
766 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
767                                                    __be32 remote_id)
768 {
769         struct rb_node *node = cm.remote_id_table.rb_node;
770         struct cm_timewait_info *timewait_info;
771
772         while (node) {
773                 timewait_info = rb_entry(node, struct cm_timewait_info,
774                                          remote_id_node);
775                 if (be32_lt(remote_id, timewait_info->work.remote_id))
776                         node = node->rb_left;
777                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
778                         node = node->rb_right;
779                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
780                         node = node->rb_left;
781                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
782                         node = node->rb_right;
783                 else
784                         return timewait_info;
785         }
786         return NULL;
787 }
788
789 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
790                                                       *timewait_info)
791 {
792         struct rb_node **link = &cm.remote_qp_table.rb_node;
793         struct rb_node *parent = NULL;
794         struct cm_timewait_info *cur_timewait_info;
795         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
796         __be32 remote_qpn = timewait_info->remote_qpn;
797
798         while (*link) {
799                 parent = *link;
800                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
801                                              remote_qp_node);
802                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
803                         link = &(*link)->rb_left;
804                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
805                         link = &(*link)->rb_right;
806                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
807                         link = &(*link)->rb_left;
808                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
809                         link = &(*link)->rb_right;
810                 else
811                         return cur_timewait_info;
812         }
813         timewait_info->inserted_remote_qp = 1;
814         rb_link_node(&timewait_info->remote_qp_node, parent, link);
815         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
816         return NULL;
817 }
818
819 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
820                                                     *cm_id_priv)
821 {
822         struct rb_node **link = &cm.remote_sidr_table.rb_node;
823         struct rb_node *parent = NULL;
824         struct cm_id_private *cur_cm_id_priv;
825         union ib_gid *port_gid = &cm_id_priv->av.dgid;
826         __be32 remote_id = cm_id_priv->id.remote_id;
827
828         while (*link) {
829                 parent = *link;
830                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
831                                           sidr_id_node);
832                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
833                         link = &(*link)->rb_left;
834                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
835                         link = &(*link)->rb_right;
836                 else {
837                         int cmp;
838                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
839                                      sizeof *port_gid);
840                         if (cmp < 0)
841                                 link = &(*link)->rb_left;
842                         else if (cmp > 0)
843                                 link = &(*link)->rb_right;
844                         else
845                                 return cur_cm_id_priv;
846                 }
847         }
848         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
849         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
850         return NULL;
851 }
852
853 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
854                                enum ib_cm_sidr_status status)
855 {
856         struct ib_cm_sidr_rep_param param;
857
858         memset(&param, 0, sizeof param);
859         param.status = status;
860         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
861 }
862
863 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
864                                  ib_cm_handler cm_handler,
865                                  void *context)
866 {
867         struct cm_id_private *cm_id_priv;
868         int ret;
869
870         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
871         if (!cm_id_priv)
872                 return ERR_PTR(-ENOMEM);
873
874         cm_id_priv->id.state = IB_CM_IDLE;
875         cm_id_priv->id.device = device;
876         cm_id_priv->id.cm_handler = cm_handler;
877         cm_id_priv->id.context = context;
878         cm_id_priv->id.remote_cm_qpn = 1;
879         ret = cm_alloc_id(cm_id_priv);
880         if (ret)
881                 goto error;
882
883         spin_lock_init(&cm_id_priv->lock);
884         init_completion(&cm_id_priv->comp);
885         INIT_LIST_HEAD(&cm_id_priv->work_list);
886         INIT_LIST_HEAD(&cm_id_priv->prim_list);
887         INIT_LIST_HEAD(&cm_id_priv->altr_list);
888         atomic_set(&cm_id_priv->work_count, -1);
889         atomic_set(&cm_id_priv->refcount, 1);
890         return &cm_id_priv->id;
891
892 error:
893         kfree(cm_id_priv);
894         return ERR_PTR(-ENOMEM);
895 }
896 EXPORT_SYMBOL(ib_create_cm_id);
897
898 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
899 {
900         struct cm_work *work;
901
902         if (list_empty(&cm_id_priv->work_list))
903                 return NULL;
904
905         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
906         list_del(&work->list);
907         return work;
908 }
909
910 static void cm_free_work(struct cm_work *work)
911 {
912         if (work->mad_recv_wc)
913                 ib_free_recv_mad(work->mad_recv_wc);
914         kfree(work);
915 }
916
917 static inline int cm_convert_to_ms(int iba_time)
918 {
919         /* approximate conversion to ms from 4.096us x 2^iba_time */
920         return 1 << max(iba_time - 8, 0);
921 }
922
923 /*
924  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
925  * Because of how ack_timeout is stored, adding one doubles the timeout.
926  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
927  * increment it (round up) only if the other is within 50%.
928  */
929 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
930 {
931         int ack_timeout = packet_life_time + 1;
932
933         if (ack_timeout >= ca_ack_delay)
934                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
935         else
936                 ack_timeout = ca_ack_delay +
937                               (ack_timeout >= (ca_ack_delay - 1));
938
939         return min(31, ack_timeout);
940 }
941
942 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
943 {
944         if (timewait_info->inserted_remote_id) {
945                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
946                 timewait_info->inserted_remote_id = 0;
947         }
948
949         if (timewait_info->inserted_remote_qp) {
950                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
951                 timewait_info->inserted_remote_qp = 0;
952         }
953 }
954
955 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
956 {
957         struct cm_timewait_info *timewait_info;
958
959         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
960         if (!timewait_info)
961                 return ERR_PTR(-ENOMEM);
962
963         timewait_info->work.local_id = local_id;
964         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
965         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
966         return timewait_info;
967 }
968
969 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
970 {
971         int wait_time;
972         unsigned long flags;
973         struct cm_device *cm_dev;
974
975         cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
976         if (!cm_dev)
977                 return;
978
979         spin_lock_irqsave(&cm.lock, flags);
980         cm_cleanup_timewait(cm_id_priv->timewait_info);
981         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
982         spin_unlock_irqrestore(&cm.lock, flags);
983
984         /*
985          * The cm_id could be destroyed by the user before we exit timewait.
986          * To protect against this, we search for the cm_id after exiting
987          * timewait before notifying the user that we've exited timewait.
988          */
989         cm_id_priv->id.state = IB_CM_TIMEWAIT;
990         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
991
992         /* Check if the device started its remove_one */
993         spin_lock_irqsave(&cm.lock, flags);
994         if (!cm_dev->going_down)
995                 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
996                                    msecs_to_jiffies(wait_time));
997         spin_unlock_irqrestore(&cm.lock, flags);
998
999         cm_id_priv->timewait_info = NULL;
1000 }
1001
1002 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
1003 {
1004         unsigned long flags;
1005
1006         cm_id_priv->id.state = IB_CM_IDLE;
1007         if (cm_id_priv->timewait_info) {
1008                 spin_lock_irqsave(&cm.lock, flags);
1009                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1010                 spin_unlock_irqrestore(&cm.lock, flags);
1011                 kfree(cm_id_priv->timewait_info);
1012                 cm_id_priv->timewait_info = NULL;
1013         }
1014 }
1015
1016 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
1017 {
1018         struct cm_id_private *cm_id_priv;
1019         struct cm_work *work;
1020
1021         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1022 retest:
1023         spin_lock_irq(&cm_id_priv->lock);
1024         switch (cm_id->state) {
1025         case IB_CM_LISTEN:
1026                 spin_unlock_irq(&cm_id_priv->lock);
1027
1028                 spin_lock_irq(&cm.lock);
1029                 if (--cm_id_priv->listen_sharecount > 0) {
1030                         /* The id is still shared. */
1031                         cm_deref_id(cm_id_priv);
1032                         spin_unlock_irq(&cm.lock);
1033                         return;
1034                 }
1035                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
1036                 spin_unlock_irq(&cm.lock);
1037                 break;
1038         case IB_CM_SIDR_REQ_SENT:
1039                 cm_id->state = IB_CM_IDLE;
1040                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1041                 spin_unlock_irq(&cm_id_priv->lock);
1042                 break;
1043         case IB_CM_SIDR_REQ_RCVD:
1044                 spin_unlock_irq(&cm_id_priv->lock);
1045                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
1046                 spin_lock_irq(&cm.lock);
1047                 if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
1048                         rb_erase(&cm_id_priv->sidr_id_node,
1049                                  &cm.remote_sidr_table);
1050                 spin_unlock_irq(&cm.lock);
1051                 break;
1052         case IB_CM_REQ_SENT:
1053         case IB_CM_MRA_REQ_RCVD:
1054                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1055                 spin_unlock_irq(&cm_id_priv->lock);
1056                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
1057                                &cm_id_priv->id.device->node_guid,
1058                                sizeof cm_id_priv->id.device->node_guid,
1059                                NULL, 0);
1060                 break;
1061         case IB_CM_REQ_RCVD:
1062                 if (err == -ENOMEM) {
1063                         /* Do not reject to allow future retries. */
1064                         cm_reset_to_idle(cm_id_priv);
1065                         spin_unlock_irq(&cm_id_priv->lock);
1066                 } else {
1067                         spin_unlock_irq(&cm_id_priv->lock);
1068                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1069                                        NULL, 0, NULL, 0);
1070                 }
1071                 break;
1072         case IB_CM_REP_SENT:
1073         case IB_CM_MRA_REP_RCVD:
1074                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1075                 /* Fall through */
1076         case IB_CM_MRA_REQ_SENT:
1077         case IB_CM_REP_RCVD:
1078         case IB_CM_MRA_REP_SENT:
1079                 spin_unlock_irq(&cm_id_priv->lock);
1080                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1081                                NULL, 0, NULL, 0);
1082                 break;
1083         case IB_CM_ESTABLISHED:
1084                 spin_unlock_irq(&cm_id_priv->lock);
1085                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
1086                         break;
1087                 ib_send_cm_dreq(cm_id, NULL, 0);
1088                 goto retest;
1089         case IB_CM_DREQ_SENT:
1090                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1091                 cm_enter_timewait(cm_id_priv);
1092                 spin_unlock_irq(&cm_id_priv->lock);
1093                 break;
1094         case IB_CM_DREQ_RCVD:
1095                 spin_unlock_irq(&cm_id_priv->lock);
1096                 ib_send_cm_drep(cm_id, NULL, 0);
1097                 break;
1098         default:
1099                 spin_unlock_irq(&cm_id_priv->lock);
1100                 break;
1101         }
1102
1103         spin_lock_irq(&cm.lock);
1104         if (!list_empty(&cm_id_priv->altr_list) &&
1105             (!cm_id_priv->altr_send_port_not_ready))
1106                 list_del(&cm_id_priv->altr_list);
1107         if (!list_empty(&cm_id_priv->prim_list) &&
1108             (!cm_id_priv->prim_send_port_not_ready))
1109                 list_del(&cm_id_priv->prim_list);
1110         spin_unlock_irq(&cm.lock);
1111
1112         cm_free_id(cm_id->local_id);
1113         cm_deref_id(cm_id_priv);
1114         wait_for_completion(&cm_id_priv->comp);
1115         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
1116                 cm_free_work(work);
1117
1118         rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
1119         rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
1120         kfree(cm_id_priv->private_data);
1121         kfree(cm_id_priv);
1122 }
1123
1124 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
1125 {
1126         cm_destroy_id(cm_id, 0);
1127 }
1128 EXPORT_SYMBOL(ib_destroy_cm_id);
1129
1130 /**
1131  * __ib_cm_listen - Initiates listening on the specified service ID for
1132  *   connection and service ID resolution requests.
1133  * @cm_id: Connection identifier associated with the listen request.
1134  * @service_id: Service identifier matched against incoming connection
1135  *   and service ID resolution requests.  The service ID should be specified
1136  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1137  *   assign a service ID to the caller.
1138  * @service_mask: Mask applied to service ID used to listen across a
1139  *   range of service IDs.  If set to 0, the service ID is matched
1140  *   exactly.  This parameter is ignored if %service_id is set to
1141  *   IB_CM_ASSIGN_SERVICE_ID.
1142  */
1143 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
1144                           __be64 service_mask)
1145 {
1146         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
1147         int ret = 0;
1148
1149         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
1150         service_id &= service_mask;
1151         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
1152             (service_id != IB_CM_ASSIGN_SERVICE_ID))
1153                 return -EINVAL;
1154
1155         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1156         if (cm_id->state != IB_CM_IDLE)
1157                 return -EINVAL;
1158
1159         cm_id->state = IB_CM_LISTEN;
1160         ++cm_id_priv->listen_sharecount;
1161
1162         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1163                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1164                 cm_id->service_mask = ~cpu_to_be64(0);
1165         } else {
1166                 cm_id->service_id = service_id;
1167                 cm_id->service_mask = service_mask;
1168         }
1169         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1170
1171         if (cur_cm_id_priv) {
1172                 cm_id->state = IB_CM_IDLE;
1173                 --cm_id_priv->listen_sharecount;
1174                 ret = -EBUSY;
1175         }
1176         return ret;
1177 }
1178
1179 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1180 {
1181         unsigned long flags;
1182         int ret;
1183
1184         spin_lock_irqsave(&cm.lock, flags);
1185         ret = __ib_cm_listen(cm_id, service_id, service_mask);
1186         spin_unlock_irqrestore(&cm.lock, flags);
1187
1188         return ret;
1189 }
1190 EXPORT_SYMBOL(ib_cm_listen);
1191
1192 /**
1193  * Create a new listening ib_cm_id and listen on the given service ID.
1194  *
1195  * If there's an existing ID listening on that same device and service ID,
1196  * return it.
1197  *
1198  * @device: Device associated with the cm_id.  All related communication will
1199  * be associated with the specified device.
1200  * @cm_handler: Callback invoked to notify the user of CM events.
1201  * @service_id: Service identifier matched against incoming connection
1202  *   and service ID resolution requests.  The service ID should be specified
1203  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1204  *   assign a service ID to the caller.
1205  *
1206  * Callers should call ib_destroy_cm_id when done with the listener ID.
1207  */
1208 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1209                                      ib_cm_handler cm_handler,
1210                                      __be64 service_id)
1211 {
1212         struct cm_id_private *cm_id_priv;
1213         struct ib_cm_id *cm_id;
1214         unsigned long flags;
1215         int err = 0;
1216
1217         /* Create an ID in advance, since the creation may sleep */
1218         cm_id = ib_create_cm_id(device, cm_handler, NULL);
1219         if (IS_ERR(cm_id))
1220                 return cm_id;
1221
1222         spin_lock_irqsave(&cm.lock, flags);
1223
1224         if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1225                 goto new_id;
1226
1227         /* Find an existing ID */
1228         cm_id_priv = cm_find_listen(device, service_id);
1229         if (cm_id_priv) {
1230                 if (cm_id->cm_handler != cm_handler || cm_id->context) {
1231                         /* Sharing an ib_cm_id with different handlers is not
1232                          * supported */
1233                         spin_unlock_irqrestore(&cm.lock, flags);
1234                         return ERR_PTR(-EINVAL);
1235                 }
1236                 atomic_inc(&cm_id_priv->refcount);
1237                 ++cm_id_priv->listen_sharecount;
1238                 spin_unlock_irqrestore(&cm.lock, flags);
1239
1240                 ib_destroy_cm_id(cm_id);
1241                 cm_id = &cm_id_priv->id;
1242                 return cm_id;
1243         }
1244
1245 new_id:
1246         /* Use newly created ID */
1247         err = __ib_cm_listen(cm_id, service_id, 0);
1248
1249         spin_unlock_irqrestore(&cm.lock, flags);
1250
1251         if (err) {
1252                 ib_destroy_cm_id(cm_id);
1253                 return ERR_PTR(err);
1254         }
1255         return cm_id;
1256 }
1257 EXPORT_SYMBOL(ib_cm_insert_listen);
1258
1259 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
1260 {
1261         u64 hi_tid, low_tid;
1262
1263         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1264         low_tid  = (u64)cm_id_priv->id.local_id;
1265         return cpu_to_be64(hi_tid | low_tid);
1266 }
1267
1268 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1269                               __be16 attr_id, __be64 tid)
1270 {
1271         hdr->base_version  = IB_MGMT_BASE_VERSION;
1272         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
1273         hdr->class_version = IB_CM_CLASS_VERSION;
1274         hdr->method        = IB_MGMT_METHOD_SEND;
1275         hdr->attr_id       = attr_id;
1276         hdr->tid           = tid;
1277 }
1278
1279 static void cm_format_req(struct cm_req_msg *req_msg,
1280                           struct cm_id_private *cm_id_priv,
1281                           struct ib_cm_req_param *param)
1282 {
1283         struct sa_path_rec *pri_path = param->primary_path;
1284         struct sa_path_rec *alt_path = param->alternate_path;
1285         bool pri_ext = false;
1286
1287         if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
1288                 pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
1289                                               pri_path->opa.slid);
1290
1291         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1292                           cm_form_tid(cm_id_priv));
1293
1294         req_msg->local_comm_id = cm_id_priv->id.local_id;
1295         req_msg->service_id = param->service_id;
1296         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1297         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1298         cm_req_set_init_depth(req_msg, param->initiator_depth);
1299         cm_req_set_remote_resp_timeout(req_msg,
1300                                        param->remote_cm_response_timeout);
1301         cm_req_set_qp_type(req_msg, param->qp_type);
1302         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1303         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1304         cm_req_set_local_resp_timeout(req_msg,
1305                                       param->local_cm_response_timeout);
1306         req_msg->pkey = param->primary_path->pkey;
1307         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1308         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1309
1310         if (param->qp_type != IB_QPT_XRC_INI) {
1311                 cm_req_set_resp_res(req_msg, param->responder_resources);
1312                 cm_req_set_retry_count(req_msg, param->retry_count);
1313                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1314                 cm_req_set_srq(req_msg, param->srq);
1315         }
1316
1317         req_msg->primary_local_gid = pri_path->sgid;
1318         req_msg->primary_remote_gid = pri_path->dgid;
1319         if (pri_ext) {
1320                 req_msg->primary_local_gid.global.interface_id
1321                         = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
1322                 req_msg->primary_remote_gid.global.interface_id
1323                         = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
1324         }
1325         if (pri_path->hop_limit <= 1) {
1326                 req_msg->primary_local_lid = pri_ext ? 0 :
1327                         htons(ntohl(sa_path_get_slid(pri_path)));
1328                 req_msg->primary_remote_lid = pri_ext ? 0 :
1329                         htons(ntohl(sa_path_get_dlid(pri_path)));
1330         } else {
1331                 /* Work-around until there's a way to obtain remote LID info */
1332                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1333                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1334         }
1335         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1336         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1337         req_msg->primary_traffic_class = pri_path->traffic_class;
1338         req_msg->primary_hop_limit = pri_path->hop_limit;
1339         cm_req_set_primary_sl(req_msg, pri_path->sl);
1340         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1341         cm_req_set_primary_local_ack_timeout(req_msg,
1342                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1343                                pri_path->packet_life_time));
1344
1345         if (alt_path) {
1346                 bool alt_ext = false;
1347
1348                 if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
1349                         alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
1350                                                       alt_path->opa.slid);
1351
1352                 req_msg->alt_local_gid = alt_path->sgid;
1353                 req_msg->alt_remote_gid = alt_path->dgid;
1354                 if (alt_ext) {
1355                         req_msg->alt_local_gid.global.interface_id
1356                                 = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
1357                         req_msg->alt_remote_gid.global.interface_id
1358                                 = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
1359                 }
1360                 if (alt_path->hop_limit <= 1) {
1361                         req_msg->alt_local_lid = alt_ext ? 0 :
1362                                 htons(ntohl(sa_path_get_slid(alt_path)));
1363                         req_msg->alt_remote_lid = alt_ext ? 0 :
1364                                 htons(ntohl(sa_path_get_dlid(alt_path)));
1365                 } else {
1366                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1367                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1368                 }
1369                 cm_req_set_alt_flow_label(req_msg,
1370                                           alt_path->flow_label);
1371                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1372                 req_msg->alt_traffic_class = alt_path->traffic_class;
1373                 req_msg->alt_hop_limit = alt_path->hop_limit;
1374                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1375                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1376                 cm_req_set_alt_local_ack_timeout(req_msg,
1377                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1378                                        alt_path->packet_life_time));
1379         }
1380
1381         if (param->private_data && param->private_data_len)
1382                 memcpy(req_msg->private_data, param->private_data,
1383                        param->private_data_len);
1384 }
1385
1386 static int cm_validate_req_param(struct ib_cm_req_param *param)
1387 {
1388         /* peer-to-peer not supported */
1389         if (param->peer_to_peer)
1390                 return -EINVAL;
1391
1392         if (!param->primary_path)
1393                 return -EINVAL;
1394
1395         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1396             param->qp_type != IB_QPT_XRC_INI)
1397                 return -EINVAL;
1398
1399         if (param->private_data &&
1400             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1401                 return -EINVAL;
1402
1403         if (param->alternate_path &&
1404             (param->alternate_path->pkey != param->primary_path->pkey ||
1405              param->alternate_path->mtu != param->primary_path->mtu))
1406                 return -EINVAL;
1407
1408         return 0;
1409 }
1410
1411 int ib_send_cm_req(struct ib_cm_id *cm_id,
1412                    struct ib_cm_req_param *param)
1413 {
1414         struct cm_id_private *cm_id_priv;
1415         struct cm_req_msg *req_msg;
1416         unsigned long flags;
1417         int ret;
1418
1419         ret = cm_validate_req_param(param);
1420         if (ret)
1421                 return ret;
1422
1423         /* Verify that we're not in timewait. */
1424         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1425         spin_lock_irqsave(&cm_id_priv->lock, flags);
1426         if (cm_id->state != IB_CM_IDLE) {
1427                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1428                 ret = -EINVAL;
1429                 goto out;
1430         }
1431         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1432
1433         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1434                                                             id.local_id);
1435         if (IS_ERR(cm_id_priv->timewait_info)) {
1436                 ret = PTR_ERR(cm_id_priv->timewait_info);
1437                 goto out;
1438         }
1439
1440         ret = cm_init_av_by_path(param->primary_path,
1441                                  param->ppath_sgid_attr, &cm_id_priv->av,
1442                                  cm_id_priv);
1443         if (ret)
1444                 goto error1;
1445         if (param->alternate_path) {
1446                 ret = cm_init_av_by_path(param->alternate_path, NULL,
1447                                          &cm_id_priv->alt_av, cm_id_priv);
1448                 if (ret)
1449                         goto error1;
1450         }
1451         cm_id->service_id = param->service_id;
1452         cm_id->service_mask = ~cpu_to_be64(0);
1453         cm_id_priv->timeout_ms = cm_convert_to_ms(
1454                                     param->primary_path->packet_life_time) * 2 +
1455                                  cm_convert_to_ms(
1456                                     param->remote_cm_response_timeout);
1457         cm_id_priv->max_cm_retries = param->max_cm_retries;
1458         cm_id_priv->initiator_depth = param->initiator_depth;
1459         cm_id_priv->responder_resources = param->responder_resources;
1460         cm_id_priv->retry_count = param->retry_count;
1461         cm_id_priv->path_mtu = param->primary_path->mtu;
1462         cm_id_priv->pkey = param->primary_path->pkey;
1463         cm_id_priv->qp_type = param->qp_type;
1464
1465         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1466         if (ret)
1467                 goto error1;
1468
1469         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1470         cm_format_req(req_msg, cm_id_priv, param);
1471         cm_id_priv->tid = req_msg->hdr.tid;
1472         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1473         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1474
1475         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1476         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1477
1478         spin_lock_irqsave(&cm_id_priv->lock, flags);
1479         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1480         if (ret) {
1481                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1482                 goto error2;
1483         }
1484         BUG_ON(cm_id->state != IB_CM_IDLE);
1485         cm_id->state = IB_CM_REQ_SENT;
1486         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1487         return 0;
1488
1489 error2: cm_free_msg(cm_id_priv->msg);
1490 error1: kfree(cm_id_priv->timewait_info);
1491 out:    return ret;
1492 }
1493 EXPORT_SYMBOL(ib_send_cm_req);
1494
1495 static int cm_issue_rej(struct cm_port *port,
1496                         struct ib_mad_recv_wc *mad_recv_wc,
1497                         enum ib_cm_rej_reason reason,
1498                         enum cm_msg_response msg_rejected,
1499                         void *ari, u8 ari_length)
1500 {
1501         struct ib_mad_send_buf *msg = NULL;
1502         struct cm_rej_msg *rej_msg, *rcv_msg;
1503         int ret;
1504
1505         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1506         if (ret)
1507                 return ret;
1508
1509         /* We just need common CM header information.  Cast to any message. */
1510         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1511         rej_msg = (struct cm_rej_msg *) msg->mad;
1512
1513         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1514         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1515         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1516         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1517         rej_msg->reason = cpu_to_be16(reason);
1518
1519         if (ari && ari_length) {
1520                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1521                 memcpy(rej_msg->ari, ari, ari_length);
1522         }
1523
1524         ret = ib_post_send_mad(msg, NULL);
1525         if (ret)
1526                 cm_free_msg(msg);
1527
1528         return ret;
1529 }
1530
1531 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1532                                     __be32 local_qpn, __be32 remote_qpn)
1533 {
1534         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1535                 ((local_ca_guid == remote_ca_guid) &&
1536                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1537 }
1538
1539 static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
1540 {
1541         return ((req_msg->alt_local_lid) ||
1542                 (ib_is_opa_gid(&req_msg->alt_local_gid)));
1543 }
1544
1545 static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
1546                                  struct sa_path_rec *path, union ib_gid *gid)
1547 {
1548         if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
1549                 path->rec_type = SA_PATH_REC_TYPE_OPA;
1550         else
1551                 path->rec_type = SA_PATH_REC_TYPE_IB;
1552 }
1553
1554 static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
1555                                         struct sa_path_rec *primary_path,
1556                                         struct sa_path_rec *alt_path)
1557 {
1558         u32 lid;
1559
1560         if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1561                 sa_path_set_dlid(primary_path,
1562                                  ntohs(req_msg->primary_local_lid));
1563                 sa_path_set_slid(primary_path,
1564                                  ntohs(req_msg->primary_remote_lid));
1565         } else {
1566                 lid = opa_get_lid_from_gid(&req_msg->primary_local_gid);
1567                 sa_path_set_dlid(primary_path, lid);
1568
1569                 lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid);
1570                 sa_path_set_slid(primary_path, lid);
1571         }
1572
1573         if (!cm_req_has_alt_path(req_msg))
1574                 return;
1575
1576         if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1577                 sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid));
1578                 sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid));
1579         } else {
1580                 lid = opa_get_lid_from_gid(&req_msg->alt_local_gid);
1581                 sa_path_set_dlid(alt_path, lid);
1582
1583                 lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid);
1584                 sa_path_set_slid(alt_path, lid);
1585         }
1586 }
1587
1588 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1589                                      struct sa_path_rec *primary_path,
1590                                      struct sa_path_rec *alt_path)
1591 {
1592         primary_path->dgid = req_msg->primary_local_gid;
1593         primary_path->sgid = req_msg->primary_remote_gid;
1594         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1595         primary_path->hop_limit = req_msg->primary_hop_limit;
1596         primary_path->traffic_class = req_msg->primary_traffic_class;
1597         primary_path->reversible = 1;
1598         primary_path->pkey = req_msg->pkey;
1599         primary_path->sl = cm_req_get_primary_sl(req_msg);
1600         primary_path->mtu_selector = IB_SA_EQ;
1601         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1602         primary_path->rate_selector = IB_SA_EQ;
1603         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1604         primary_path->packet_life_time_selector = IB_SA_EQ;
1605         primary_path->packet_life_time =
1606                 cm_req_get_primary_local_ack_timeout(req_msg);
1607         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1608         primary_path->service_id = req_msg->service_id;
1609         if (sa_path_is_roce(primary_path))
1610                 primary_path->roce.route_resolved = false;
1611
1612         if (cm_req_has_alt_path(req_msg)) {
1613                 alt_path->dgid = req_msg->alt_local_gid;
1614                 alt_path->sgid = req_msg->alt_remote_gid;
1615                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1616                 alt_path->hop_limit = req_msg->alt_hop_limit;
1617                 alt_path->traffic_class = req_msg->alt_traffic_class;
1618                 alt_path->reversible = 1;
1619                 alt_path->pkey = req_msg->pkey;
1620                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1621                 alt_path->mtu_selector = IB_SA_EQ;
1622                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1623                 alt_path->rate_selector = IB_SA_EQ;
1624                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1625                 alt_path->packet_life_time_selector = IB_SA_EQ;
1626                 alt_path->packet_life_time =
1627                         cm_req_get_alt_local_ack_timeout(req_msg);
1628                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1629                 alt_path->service_id = req_msg->service_id;
1630
1631                 if (sa_path_is_roce(alt_path))
1632                         alt_path->roce.route_resolved = false;
1633         }
1634         cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
1635 }
1636
1637 static u16 cm_get_bth_pkey(struct cm_work *work)
1638 {
1639         struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1640         u8 port_num = work->port->port_num;
1641         u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1642         u16 pkey;
1643         int ret;
1644
1645         ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1646         if (ret) {
1647                 dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1648                                      port_num, pkey_index, ret);
1649                 return 0;
1650         }
1651
1652         return pkey;
1653 }
1654
1655 /**
1656  * Convert OPA SGID to IB SGID
1657  * ULPs (such as IPoIB) do not understand OPA GIDs and will
1658  * reject them as the local_gid will not match the sgid. Therefore,
1659  * change the pathrec's SGID to an IB SGID.
1660  *
1661  * @work: Work completion
1662  * @path: Path record
1663  */
1664 static void cm_opa_to_ib_sgid(struct cm_work *work,
1665                               struct sa_path_rec *path)
1666 {
1667         struct ib_device *dev = work->port->cm_dev->ib_device;
1668         u8 port_num = work->port->port_num;
1669
1670         if (rdma_cap_opa_ah(dev, port_num) &&
1671             (ib_is_opa_gid(&path->sgid))) {
1672                 union ib_gid sgid;
1673
1674                 if (rdma_query_gid(dev, port_num, 0, &sgid)) {
1675                         dev_warn(&dev->dev,
1676                                  "Error updating sgid in CM request\n");
1677                         return;
1678                 }
1679
1680                 path->sgid = sgid;
1681         }
1682 }
1683
1684 static void cm_format_req_event(struct cm_work *work,
1685                                 struct cm_id_private *cm_id_priv,
1686                                 struct ib_cm_id *listen_id)
1687 {
1688         struct cm_req_msg *req_msg;
1689         struct ib_cm_req_event_param *param;
1690
1691         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1692         param = &work->cm_event.param.req_rcvd;
1693         param->listen_id = listen_id;
1694         param->bth_pkey = cm_get_bth_pkey(work);
1695         param->port = cm_id_priv->av.port->port_num;
1696         param->primary_path = &work->path[0];
1697         cm_opa_to_ib_sgid(work, param->primary_path);
1698         if (cm_req_has_alt_path(req_msg)) {
1699                 param->alternate_path = &work->path[1];
1700                 cm_opa_to_ib_sgid(work, param->alternate_path);
1701         } else {
1702                 param->alternate_path = NULL;
1703         }
1704         param->remote_ca_guid = req_msg->local_ca_guid;
1705         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1706         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1707         param->qp_type = cm_req_get_qp_type(req_msg);
1708         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1709         param->responder_resources = cm_req_get_init_depth(req_msg);
1710         param->initiator_depth = cm_req_get_resp_res(req_msg);
1711         param->local_cm_response_timeout =
1712                                         cm_req_get_remote_resp_timeout(req_msg);
1713         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1714         param->remote_cm_response_timeout =
1715                                         cm_req_get_local_resp_timeout(req_msg);
1716         param->retry_count = cm_req_get_retry_count(req_msg);
1717         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1718         param->srq = cm_req_get_srq(req_msg);
1719         param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
1720         work->cm_event.private_data = &req_msg->private_data;
1721 }
1722
1723 static void cm_process_work(struct cm_id_private *cm_id_priv,
1724                             struct cm_work *work)
1725 {
1726         int ret;
1727
1728         /* We will typically only have the current event to report. */
1729         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1730         cm_free_work(work);
1731
1732         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1733                 spin_lock_irq(&cm_id_priv->lock);
1734                 work = cm_dequeue_work(cm_id_priv);
1735                 spin_unlock_irq(&cm_id_priv->lock);
1736                 if (!work)
1737                         return;
1738
1739                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1740                                                 &work->cm_event);
1741                 cm_free_work(work);
1742         }
1743         cm_deref_id(cm_id_priv);
1744         if (ret)
1745                 cm_destroy_id(&cm_id_priv->id, ret);
1746 }
1747
1748 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1749                           struct cm_id_private *cm_id_priv,
1750                           enum cm_msg_response msg_mraed, u8 service_timeout,
1751                           const void *private_data, u8 private_data_len)
1752 {
1753         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1754         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1755         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1756         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1757         cm_mra_set_service_timeout(mra_msg, service_timeout);
1758
1759         if (private_data && private_data_len)
1760                 memcpy(mra_msg->private_data, private_data, private_data_len);
1761 }
1762
1763 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1764                           struct cm_id_private *cm_id_priv,
1765                           enum ib_cm_rej_reason reason,
1766                           void *ari,
1767                           u8 ari_length,
1768                           const void *private_data,
1769                           u8 private_data_len)
1770 {
1771         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1772         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1773
1774         switch(cm_id_priv->id.state) {
1775         case IB_CM_REQ_RCVD:
1776                 rej_msg->local_comm_id = 0;
1777                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1778                 break;
1779         case IB_CM_MRA_REQ_SENT:
1780                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1781                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1782                 break;
1783         case IB_CM_REP_RCVD:
1784         case IB_CM_MRA_REP_SENT:
1785                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1786                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1787                 break;
1788         default:
1789                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1790                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1791                 break;
1792         }
1793
1794         rej_msg->reason = cpu_to_be16(reason);
1795         if (ari && ari_length) {
1796                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1797                 memcpy(rej_msg->ari, ari, ari_length);
1798         }
1799
1800         if (private_data && private_data_len)
1801                 memcpy(rej_msg->private_data, private_data, private_data_len);
1802 }
1803
1804 static void cm_dup_req_handler(struct cm_work *work,
1805                                struct cm_id_private *cm_id_priv)
1806 {
1807         struct ib_mad_send_buf *msg = NULL;
1808         int ret;
1809
1810         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1811                         counter[CM_REQ_COUNTER]);
1812
1813         /* Quick state check to discard duplicate REQs. */
1814         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1815                 return;
1816
1817         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1818         if (ret)
1819                 return;
1820
1821         spin_lock_irq(&cm_id_priv->lock);
1822         switch (cm_id_priv->id.state) {
1823         case IB_CM_MRA_REQ_SENT:
1824                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1825                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1826                               cm_id_priv->private_data,
1827                               cm_id_priv->private_data_len);
1828                 break;
1829         case IB_CM_TIMEWAIT:
1830                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1831                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1832                 break;
1833         default:
1834                 goto unlock;
1835         }
1836         spin_unlock_irq(&cm_id_priv->lock);
1837
1838         ret = ib_post_send_mad(msg, NULL);
1839         if (ret)
1840                 goto free;
1841         return;
1842
1843 unlock: spin_unlock_irq(&cm_id_priv->lock);
1844 free:   cm_free_msg(msg);
1845 }
1846
1847 static struct cm_id_private * cm_match_req(struct cm_work *work,
1848                                            struct cm_id_private *cm_id_priv)
1849 {
1850         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1851         struct cm_timewait_info *timewait_info;
1852         struct cm_req_msg *req_msg;
1853         struct ib_cm_id *cm_id;
1854
1855         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1856
1857         /* Check for possible duplicate REQ. */
1858         spin_lock_irq(&cm.lock);
1859         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1860         if (timewait_info) {
1861                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1862                                            timewait_info->work.remote_id);
1863                 spin_unlock_irq(&cm.lock);
1864                 if (cur_cm_id_priv) {
1865                         cm_dup_req_handler(work, cur_cm_id_priv);
1866                         cm_deref_id(cur_cm_id_priv);
1867                 }
1868                 return NULL;
1869         }
1870
1871         /* Check for stale connections. */
1872         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1873         if (timewait_info) {
1874                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1875                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1876                                            timewait_info->work.remote_id);
1877
1878                 spin_unlock_irq(&cm.lock);
1879                 cm_issue_rej(work->port, work->mad_recv_wc,
1880                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1881                              NULL, 0);
1882                 if (cur_cm_id_priv) {
1883                         cm_id = &cur_cm_id_priv->id;
1884                         ib_send_cm_dreq(cm_id, NULL, 0);
1885                         cm_deref_id(cur_cm_id_priv);
1886                 }
1887                 return NULL;
1888         }
1889
1890         /* Find matching listen request. */
1891         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1892                                            req_msg->service_id);
1893         if (!listen_cm_id_priv) {
1894                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1895                 spin_unlock_irq(&cm.lock);
1896                 cm_issue_rej(work->port, work->mad_recv_wc,
1897                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1898                              NULL, 0);
1899                 goto out;
1900         }
1901         atomic_inc(&listen_cm_id_priv->refcount);
1902         atomic_inc(&cm_id_priv->refcount);
1903         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1904         atomic_inc(&cm_id_priv->work_count);
1905         spin_unlock_irq(&cm.lock);
1906 out:
1907         return listen_cm_id_priv;
1908 }
1909
1910 /*
1911  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1912  * we need to override the LID/SL data in the REQ with the LID information
1913  * in the work completion.
1914  */
1915 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1916 {
1917         if (!cm_req_get_primary_subnet_local(req_msg)) {
1918                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1919                         req_msg->primary_local_lid = ib_lid_be16(wc->slid);
1920                         cm_req_set_primary_sl(req_msg, wc->sl);
1921                 }
1922
1923                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1924                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1925         }
1926
1927         if (!cm_req_get_alt_subnet_local(req_msg)) {
1928                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1929                         req_msg->alt_local_lid = ib_lid_be16(wc->slid);
1930                         cm_req_set_alt_sl(req_msg, wc->sl);
1931                 }
1932
1933                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1934                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1935         }
1936 }
1937
1938 static int cm_req_handler(struct cm_work *work)
1939 {
1940         struct ib_cm_id *cm_id;
1941         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1942         struct cm_req_msg *req_msg;
1943         const struct ib_global_route *grh;
1944         const struct ib_gid_attr *gid_attr;
1945         int ret;
1946
1947         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1948
1949         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1950         if (IS_ERR(cm_id))
1951                 return PTR_ERR(cm_id);
1952
1953         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1954         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1955         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1956                                       work->mad_recv_wc->recv_buf.grh,
1957                                       &cm_id_priv->av);
1958         if (ret)
1959                 goto destroy;
1960         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1961                                                             id.local_id);
1962         if (IS_ERR(cm_id_priv->timewait_info)) {
1963                 ret = PTR_ERR(cm_id_priv->timewait_info);
1964                 goto destroy;
1965         }
1966         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1967         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1968         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1969
1970         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1971         if (!listen_cm_id_priv) {
1972                 pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
1973                          be32_to_cpu(cm_id->local_id));
1974                 ret = -EINVAL;
1975                 goto free_timeinfo;
1976         }
1977
1978         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1979         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1980         cm_id_priv->id.service_id = req_msg->service_id;
1981         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1982
1983         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1984
1985         memset(&work->path[0], 0, sizeof(work->path[0]));
1986         if (cm_req_has_alt_path(req_msg))
1987                 memset(&work->path[1], 0, sizeof(work->path[1]));
1988         grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
1989         gid_attr = grh->sgid_attr;
1990
1991         if (gid_attr && gid_attr->ndev) {
1992                 work->path[0].rec_type =
1993                         sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
1994         } else {
1995                 /* If no GID attribute or ndev is null, it is not RoCE. */
1996                 cm_path_set_rec_type(work->port->cm_dev->ib_device,
1997                                      work->port->port_num,
1998                                      &work->path[0],
1999                                      &req_msg->primary_local_gid);
2000         }
2001         if (cm_req_has_alt_path(req_msg))
2002                 work->path[1].rec_type = work->path[0].rec_type;
2003         cm_format_paths_from_req(req_msg, &work->path[0],
2004                                  &work->path[1]);
2005         if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
2006                 sa_path_set_dmac(&work->path[0],
2007                                  cm_id_priv->av.ah_attr.roce.dmac);
2008         work->path[0].hop_limit = grh->hop_limit;
2009         ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
2010                                  cm_id_priv);
2011         if (ret) {
2012                 int err;
2013
2014                 err = rdma_query_gid(work->port->cm_dev->ib_device,
2015                                      work->port->port_num, 0,
2016                                      &work->path[0].sgid);
2017                 if (err)
2018                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2019                                        NULL, 0, NULL, 0);
2020                 else
2021                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2022                                        &work->path[0].sgid,
2023                                        sizeof(work->path[0].sgid),
2024                                        NULL, 0);
2025                 goto rejected;
2026         }
2027         if (cm_req_has_alt_path(req_msg)) {
2028                 ret = cm_init_av_by_path(&work->path[1], NULL,
2029                                          &cm_id_priv->alt_av, cm_id_priv);
2030                 if (ret) {
2031                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
2032                                        &work->path[0].sgid,
2033                                        sizeof(work->path[0].sgid), NULL, 0);
2034                         goto rejected;
2035                 }
2036         }
2037         cm_id_priv->tid = req_msg->hdr.tid;
2038         cm_id_priv->timeout_ms = cm_convert_to_ms(
2039                                         cm_req_get_local_resp_timeout(req_msg));
2040         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
2041         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
2042         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
2043         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
2044         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
2045         cm_id_priv->pkey = req_msg->pkey;
2046         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
2047         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
2048         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
2049         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
2050
2051         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
2052         cm_process_work(cm_id_priv, work);
2053         cm_deref_id(listen_cm_id_priv);
2054         return 0;
2055
2056 rejected:
2057         atomic_dec(&cm_id_priv->refcount);
2058         cm_deref_id(listen_cm_id_priv);
2059 free_timeinfo:
2060         kfree(cm_id_priv->timewait_info);
2061 destroy:
2062         ib_destroy_cm_id(cm_id);
2063         return ret;
2064 }
2065
2066 static void cm_format_rep(struct cm_rep_msg *rep_msg,
2067                           struct cm_id_private *cm_id_priv,
2068                           struct ib_cm_rep_param *param)
2069 {
2070         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
2071         rep_msg->local_comm_id = cm_id_priv->id.local_id;
2072         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2073         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
2074         rep_msg->resp_resources = param->responder_resources;
2075         cm_rep_set_target_ack_delay(rep_msg,
2076                                     cm_id_priv->av.port->cm_dev->ack_delay);
2077         cm_rep_set_failover(rep_msg, param->failover_accepted);
2078         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
2079         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
2080
2081         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
2082                 rep_msg->initiator_depth = param->initiator_depth;
2083                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
2084                 cm_rep_set_srq(rep_msg, param->srq);
2085                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
2086         } else {
2087                 cm_rep_set_srq(rep_msg, 1);
2088                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
2089         }
2090
2091         if (param->private_data && param->private_data_len)
2092                 memcpy(rep_msg->private_data, param->private_data,
2093                        param->private_data_len);
2094 }
2095
2096 int ib_send_cm_rep(struct ib_cm_id *cm_id,
2097                    struct ib_cm_rep_param *param)
2098 {
2099         struct cm_id_private *cm_id_priv;
2100         struct ib_mad_send_buf *msg;
2101         struct cm_rep_msg *rep_msg;
2102         unsigned long flags;
2103         int ret;
2104
2105         if (param->private_data &&
2106             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
2107                 return -EINVAL;
2108
2109         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2110         spin_lock_irqsave(&cm_id_priv->lock, flags);
2111         if (cm_id->state != IB_CM_REQ_RCVD &&
2112             cm_id->state != IB_CM_MRA_REQ_SENT) {
2113                 pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
2114                          be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2115                 ret = -EINVAL;
2116                 goto out;
2117         }
2118
2119         ret = cm_alloc_msg(cm_id_priv, &msg);
2120         if (ret)
2121                 goto out;
2122
2123         rep_msg = (struct cm_rep_msg *) msg->mad;
2124         cm_format_rep(rep_msg, cm_id_priv, param);
2125         msg->timeout_ms = cm_id_priv->timeout_ms;
2126         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
2127
2128         ret = ib_post_send_mad(msg, NULL);
2129         if (ret) {
2130                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2131                 cm_free_msg(msg);
2132                 return ret;
2133         }
2134
2135         cm_id->state = IB_CM_REP_SENT;
2136         cm_id_priv->msg = msg;
2137         cm_id_priv->initiator_depth = param->initiator_depth;
2138         cm_id_priv->responder_resources = param->responder_resources;
2139         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
2140         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
2141
2142 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2143         return ret;
2144 }
2145 EXPORT_SYMBOL(ib_send_cm_rep);
2146
2147 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
2148                           struct cm_id_private *cm_id_priv,
2149                           const void *private_data,
2150                           u8 private_data_len)
2151 {
2152         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
2153         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
2154         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
2155
2156         if (private_data && private_data_len)
2157                 memcpy(rtu_msg->private_data, private_data, private_data_len);
2158 }
2159
2160 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
2161                    const void *private_data,
2162                    u8 private_data_len)
2163 {
2164         struct cm_id_private *cm_id_priv;
2165         struct ib_mad_send_buf *msg;
2166         unsigned long flags;
2167         void *data;
2168         int ret;
2169
2170         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
2171                 return -EINVAL;
2172
2173         data = cm_copy_private_data(private_data, private_data_len);
2174         if (IS_ERR(data))
2175                 return PTR_ERR(data);
2176
2177         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2178         spin_lock_irqsave(&cm_id_priv->lock, flags);
2179         if (cm_id->state != IB_CM_REP_RCVD &&
2180             cm_id->state != IB_CM_MRA_REP_SENT) {
2181                 pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
2182                          be32_to_cpu(cm_id->local_id), cm_id->state);
2183                 ret = -EINVAL;
2184                 goto error;
2185         }
2186
2187         ret = cm_alloc_msg(cm_id_priv, &msg);
2188         if (ret)
2189                 goto error;
2190
2191         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2192                       private_data, private_data_len);
2193
2194         ret = ib_post_send_mad(msg, NULL);
2195         if (ret) {
2196                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2197                 cm_free_msg(msg);
2198                 kfree(data);
2199                 return ret;
2200         }
2201
2202         cm_id->state = IB_CM_ESTABLISHED;
2203         cm_set_private_data(cm_id_priv, data, private_data_len);
2204         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2205         return 0;
2206
2207 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2208         kfree(data);
2209         return ret;
2210 }
2211 EXPORT_SYMBOL(ib_send_cm_rtu);
2212
2213 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
2214 {
2215         struct cm_rep_msg *rep_msg;
2216         struct ib_cm_rep_event_param *param;
2217
2218         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2219         param = &work->cm_event.param.rep_rcvd;
2220         param->remote_ca_guid = rep_msg->local_ca_guid;
2221         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
2222         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
2223         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
2224         param->responder_resources = rep_msg->initiator_depth;
2225         param->initiator_depth = rep_msg->resp_resources;
2226         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2227         param->failover_accepted = cm_rep_get_failover(rep_msg);
2228         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
2229         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2230         param->srq = cm_rep_get_srq(rep_msg);
2231         work->cm_event.private_data = &rep_msg->private_data;
2232 }
2233
2234 static void cm_dup_rep_handler(struct cm_work *work)
2235 {
2236         struct cm_id_private *cm_id_priv;
2237         struct cm_rep_msg *rep_msg;
2238         struct ib_mad_send_buf *msg = NULL;
2239         int ret;
2240
2241         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
2242         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
2243                                    rep_msg->local_comm_id);
2244         if (!cm_id_priv)
2245                 return;
2246
2247         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2248                         counter[CM_REP_COUNTER]);
2249         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
2250         if (ret)
2251                 goto deref;
2252
2253         spin_lock_irq(&cm_id_priv->lock);
2254         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
2255                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2256                               cm_id_priv->private_data,
2257                               cm_id_priv->private_data_len);
2258         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
2259                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2260                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
2261                               cm_id_priv->private_data,
2262                               cm_id_priv->private_data_len);
2263         else
2264                 goto unlock;
2265         spin_unlock_irq(&cm_id_priv->lock);
2266
2267         ret = ib_post_send_mad(msg, NULL);
2268         if (ret)
2269                 goto free;
2270         goto deref;
2271
2272 unlock: spin_unlock_irq(&cm_id_priv->lock);
2273 free:   cm_free_msg(msg);
2274 deref:  cm_deref_id(cm_id_priv);
2275 }
2276
2277 static int cm_rep_handler(struct cm_work *work)
2278 {
2279         struct cm_id_private *cm_id_priv;
2280         struct cm_rep_msg *rep_msg;
2281         int ret;
2282         struct cm_id_private *cur_cm_id_priv;
2283         struct ib_cm_id *cm_id;
2284         struct cm_timewait_info *timewait_info;
2285
2286         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2287         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
2288         if (!cm_id_priv) {
2289                 cm_dup_rep_handler(work);
2290                 pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
2291                          be32_to_cpu(rep_msg->remote_comm_id));
2292                 return -EINVAL;
2293         }
2294
2295         cm_format_rep_event(work, cm_id_priv->qp_type);
2296
2297         spin_lock_irq(&cm_id_priv->lock);
2298         switch (cm_id_priv->id.state) {
2299         case IB_CM_REQ_SENT:
2300         case IB_CM_MRA_REQ_RCVD:
2301                 break;
2302         default:
2303                 spin_unlock_irq(&cm_id_priv->lock);
2304                 ret = -EINVAL;
2305                 pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
2306                          __func__, cm_id_priv->id.state,
2307                          be32_to_cpu(rep_msg->local_comm_id),
2308                          be32_to_cpu(rep_msg->remote_comm_id));
2309                 goto error;
2310         }
2311
2312         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
2313         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
2314         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2315
2316         spin_lock(&cm.lock);
2317         /* Check for duplicate REP. */
2318         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
2319                 spin_unlock(&cm.lock);
2320                 spin_unlock_irq(&cm_id_priv->lock);
2321                 ret = -EINVAL;
2322                 pr_debug("%s: Failed to insert remote id %d\n", __func__,
2323                          be32_to_cpu(rep_msg->remote_comm_id));
2324                 goto error;
2325         }
2326         /* Check for a stale connection. */
2327         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
2328         if (timewait_info) {
2329                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2330                          &cm.remote_id_table);
2331                 cm_id_priv->timewait_info->inserted_remote_id = 0;
2332                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
2333                                            timewait_info->work.remote_id);
2334
2335                 spin_unlock(&cm.lock);
2336                 spin_unlock_irq(&cm_id_priv->lock);
2337                 cm_issue_rej(work->port, work->mad_recv_wc,
2338                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2339                              NULL, 0);
2340                 ret = -EINVAL;
2341                 pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
2342                          __func__, be32_to_cpu(rep_msg->local_comm_id),
2343                          be32_to_cpu(rep_msg->remote_comm_id));
2344
2345                 if (cur_cm_id_priv) {
2346                         cm_id = &cur_cm_id_priv->id;
2347                         ib_send_cm_dreq(cm_id, NULL, 0);
2348                         cm_deref_id(cur_cm_id_priv);
2349                 }
2350
2351                 goto error;
2352         }
2353         spin_unlock(&cm.lock);
2354
2355         cm_id_priv->id.state = IB_CM_REP_RCVD;
2356         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
2357         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2358         cm_id_priv->initiator_depth = rep_msg->resp_resources;
2359         cm_id_priv->responder_resources = rep_msg->initiator_depth;
2360         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
2361         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2362         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2363         cm_id_priv->av.timeout =
2364                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2365                                        cm_id_priv->av.timeout - 1);
2366         cm_id_priv->alt_av.timeout =
2367                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2368                                        cm_id_priv->alt_av.timeout - 1);
2369
2370         /* todo: handle peer_to_peer */
2371
2372         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2373         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2374         if (!ret)
2375                 list_add_tail(&work->list, &cm_id_priv->work_list);
2376         spin_unlock_irq(&cm_id_priv->lock);
2377
2378         if (ret)
2379                 cm_process_work(cm_id_priv, work);
2380         else
2381                 cm_deref_id(cm_id_priv);
2382         return 0;
2383
2384 error:
2385         cm_deref_id(cm_id_priv);
2386         return ret;
2387 }
2388
2389 static int cm_establish_handler(struct cm_work *work)
2390 {
2391         struct cm_id_private *cm_id_priv;
2392         int ret;
2393
2394         /* See comment in cm_establish about lookup. */
2395         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2396         if (!cm_id_priv)
2397                 return -EINVAL;
2398
2399         spin_lock_irq(&cm_id_priv->lock);
2400         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2401                 spin_unlock_irq(&cm_id_priv->lock);
2402                 goto out;
2403         }
2404
2405         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2406         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2407         if (!ret)
2408                 list_add_tail(&work->list, &cm_id_priv->work_list);
2409         spin_unlock_irq(&cm_id_priv->lock);
2410
2411         if (ret)
2412                 cm_process_work(cm_id_priv, work);
2413         else
2414                 cm_deref_id(cm_id_priv);
2415         return 0;
2416 out:
2417         cm_deref_id(cm_id_priv);
2418         return -EINVAL;
2419 }
2420
2421 static int cm_rtu_handler(struct cm_work *work)
2422 {
2423         struct cm_id_private *cm_id_priv;
2424         struct cm_rtu_msg *rtu_msg;
2425         int ret;
2426
2427         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2428         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2429                                    rtu_msg->local_comm_id);
2430         if (!cm_id_priv)
2431                 return -EINVAL;
2432
2433         work->cm_event.private_data = &rtu_msg->private_data;
2434
2435         spin_lock_irq(&cm_id_priv->lock);
2436         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2437             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2438                 spin_unlock_irq(&cm_id_priv->lock);
2439                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2440                                 counter[CM_RTU_COUNTER]);
2441                 goto out;
2442         }
2443         cm_id_priv->id.state = IB_CM_ESTABLISHED;
2444
2445         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2446         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2447         if (!ret)
2448                 list_add_tail(&work->list, &cm_id_priv->work_list);
2449         spin_unlock_irq(&cm_id_priv->lock);
2450
2451         if (ret)
2452                 cm_process_work(cm_id_priv, work);
2453         else
2454                 cm_deref_id(cm_id_priv);
2455         return 0;
2456 out:
2457         cm_deref_id(cm_id_priv);
2458         return -EINVAL;
2459 }
2460
2461 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2462                           struct cm_id_private *cm_id_priv,
2463                           const void *private_data,
2464                           u8 private_data_len)
2465 {
2466         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2467                           cm_form_tid(cm_id_priv));
2468         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2469         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2470         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2471
2472         if (private_data && private_data_len)
2473                 memcpy(dreq_msg->private_data, private_data, private_data_len);
2474 }
2475
2476 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2477                     const void *private_data,
2478                     u8 private_data_len)
2479 {
2480         struct cm_id_private *cm_id_priv;
2481         struct ib_mad_send_buf *msg;
2482         unsigned long flags;
2483         int ret;
2484
2485         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2486                 return -EINVAL;
2487
2488         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2489         spin_lock_irqsave(&cm_id_priv->lock, flags);
2490         if (cm_id->state != IB_CM_ESTABLISHED) {
2491                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2492                          be32_to_cpu(cm_id->local_id), cm_id->state);
2493                 ret = -EINVAL;
2494                 goto out;
2495         }
2496
2497         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2498             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2499                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2500
2501         ret = cm_alloc_msg(cm_id_priv, &msg);
2502         if (ret) {
2503                 cm_enter_timewait(cm_id_priv);
2504                 goto out;
2505         }
2506
2507         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2508                        private_data, private_data_len);
2509         msg->timeout_ms = cm_id_priv->timeout_ms;
2510         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2511
2512         ret = ib_post_send_mad(msg, NULL);
2513         if (ret) {
2514                 cm_enter_timewait(cm_id_priv);
2515                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2516                 cm_free_msg(msg);
2517                 return ret;
2518         }
2519
2520         cm_id->state = IB_CM_DREQ_SENT;
2521         cm_id_priv->msg = msg;
2522 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2523         return ret;
2524 }
2525 EXPORT_SYMBOL(ib_send_cm_dreq);
2526
2527 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2528                           struct cm_id_private *cm_id_priv,
2529                           const void *private_data,
2530                           u8 private_data_len)
2531 {
2532         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2533         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2534         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2535
2536         if (private_data && private_data_len)
2537                 memcpy(drep_msg->private_data, private_data, private_data_len);
2538 }
2539
2540 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2541                     const void *private_data,
2542                     u8 private_data_len)
2543 {
2544         struct cm_id_private *cm_id_priv;
2545         struct ib_mad_send_buf *msg;
2546         unsigned long flags;
2547         void *data;
2548         int ret;
2549
2550         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2551                 return -EINVAL;
2552
2553         data = cm_copy_private_data(private_data, private_data_len);
2554         if (IS_ERR(data))
2555                 return PTR_ERR(data);
2556
2557         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2558         spin_lock_irqsave(&cm_id_priv->lock, flags);
2559         if (cm_id->state != IB_CM_DREQ_RCVD) {
2560                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2561                 kfree(data);
2562                 pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
2563                          __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
2564                 return -EINVAL;
2565         }
2566
2567         cm_set_private_data(cm_id_priv, data, private_data_len);
2568         cm_enter_timewait(cm_id_priv);
2569
2570         ret = cm_alloc_msg(cm_id_priv, &msg);
2571         if (ret)
2572                 goto out;
2573
2574         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2575                        private_data, private_data_len);
2576
2577         ret = ib_post_send_mad(msg, NULL);
2578         if (ret) {
2579                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2580                 cm_free_msg(msg);
2581                 return ret;
2582         }
2583
2584 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2585         return ret;
2586 }
2587 EXPORT_SYMBOL(ib_send_cm_drep);
2588
2589 static int cm_issue_drep(struct cm_port *port,
2590                          struct ib_mad_recv_wc *mad_recv_wc)
2591 {
2592         struct ib_mad_send_buf *msg = NULL;
2593         struct cm_dreq_msg *dreq_msg;
2594         struct cm_drep_msg *drep_msg;
2595         int ret;
2596
2597         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2598         if (ret)
2599                 return ret;
2600
2601         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2602         drep_msg = (struct cm_drep_msg *) msg->mad;
2603
2604         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2605         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2606         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2607
2608         ret = ib_post_send_mad(msg, NULL);
2609         if (ret)
2610                 cm_free_msg(msg);
2611
2612         return ret;
2613 }
2614
2615 static int cm_dreq_handler(struct cm_work *work)
2616 {
2617         struct cm_id_private *cm_id_priv;
2618         struct cm_dreq_msg *dreq_msg;
2619         struct ib_mad_send_buf *msg = NULL;
2620         int ret;
2621
2622         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2623         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2624                                    dreq_msg->local_comm_id);
2625         if (!cm_id_priv) {
2626                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2627                                 counter[CM_DREQ_COUNTER]);
2628                 cm_issue_drep(work->port, work->mad_recv_wc);
2629                 pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
2630                          __func__, be32_to_cpu(dreq_msg->local_comm_id),
2631                          be32_to_cpu(dreq_msg->remote_comm_id));
2632                 return -EINVAL;
2633         }
2634
2635         work->cm_event.private_data = &dreq_msg->private_data;
2636
2637         spin_lock_irq(&cm_id_priv->lock);
2638         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2639                 goto unlock;
2640
2641         switch (cm_id_priv->id.state) {
2642         case IB_CM_REP_SENT:
2643         case IB_CM_DREQ_SENT:
2644                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2645                 break;
2646         case IB_CM_ESTABLISHED:
2647                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2648                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2649                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2650                 break;
2651         case IB_CM_MRA_REP_RCVD:
2652                 break;
2653         case IB_CM_TIMEWAIT:
2654                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2655                                 counter[CM_DREQ_COUNTER]);
2656                 msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2657                 if (IS_ERR(msg))
2658                         goto unlock;
2659
2660                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2661                                cm_id_priv->private_data,
2662                                cm_id_priv->private_data_len);
2663                 spin_unlock_irq(&cm_id_priv->lock);
2664
2665                 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2666                     ib_post_send_mad(msg, NULL))
2667                         cm_free_msg(msg);
2668                 goto deref;
2669         case IB_CM_DREQ_RCVD:
2670                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2671                                 counter[CM_DREQ_COUNTER]);
2672                 goto unlock;
2673         default:
2674                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2675                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2676                          cm_id_priv->id.state);
2677                 goto unlock;
2678         }
2679         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2680         cm_id_priv->tid = dreq_msg->hdr.tid;
2681         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2682         if (!ret)
2683                 list_add_tail(&work->list, &cm_id_priv->work_list);
2684         spin_unlock_irq(&cm_id_priv->lock);
2685
2686         if (ret)
2687                 cm_process_work(cm_id_priv, work);
2688         else
2689                 cm_deref_id(cm_id_priv);
2690         return 0;
2691
2692 unlock: spin_unlock_irq(&cm_id_priv->lock);
2693 deref:  cm_deref_id(cm_id_priv);
2694         return -EINVAL;
2695 }
2696
2697 static int cm_drep_handler(struct cm_work *work)
2698 {
2699         struct cm_id_private *cm_id_priv;
2700         struct cm_drep_msg *drep_msg;
2701         int ret;
2702
2703         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2704         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2705                                    drep_msg->local_comm_id);
2706         if (!cm_id_priv)
2707                 return -EINVAL;
2708
2709         work->cm_event.private_data = &drep_msg->private_data;
2710
2711         spin_lock_irq(&cm_id_priv->lock);
2712         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2713             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2714                 spin_unlock_irq(&cm_id_priv->lock);
2715                 goto out;
2716         }
2717         cm_enter_timewait(cm_id_priv);
2718
2719         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2720         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2721         if (!ret)
2722                 list_add_tail(&work->list, &cm_id_priv->work_list);
2723         spin_unlock_irq(&cm_id_priv->lock);
2724
2725         if (ret)
2726                 cm_process_work(cm_id_priv, work);
2727         else
2728                 cm_deref_id(cm_id_priv);
2729         return 0;
2730 out:
2731         cm_deref_id(cm_id_priv);
2732         return -EINVAL;
2733 }
2734
2735 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2736                    enum ib_cm_rej_reason reason,
2737                    void *ari,
2738                    u8 ari_length,
2739                    const void *private_data,
2740                    u8 private_data_len)
2741 {
2742         struct cm_id_private *cm_id_priv;
2743         struct ib_mad_send_buf *msg;
2744         unsigned long flags;
2745         int ret;
2746
2747         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2748             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2749                 return -EINVAL;
2750
2751         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2752
2753         spin_lock_irqsave(&cm_id_priv->lock, flags);
2754         switch (cm_id->state) {
2755         case IB_CM_REQ_SENT:
2756         case IB_CM_MRA_REQ_RCVD:
2757         case IB_CM_REQ_RCVD:
2758         case IB_CM_MRA_REQ_SENT:
2759         case IB_CM_REP_RCVD:
2760         case IB_CM_MRA_REP_SENT:
2761                 ret = cm_alloc_msg(cm_id_priv, &msg);
2762                 if (!ret)
2763                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2764                                       cm_id_priv, reason, ari, ari_length,
2765                                       private_data, private_data_len);
2766
2767                 cm_reset_to_idle(cm_id_priv);
2768                 break;
2769         case IB_CM_REP_SENT:
2770         case IB_CM_MRA_REP_RCVD:
2771                 ret = cm_alloc_msg(cm_id_priv, &msg);
2772                 if (!ret)
2773                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2774                                       cm_id_priv, reason, ari, ari_length,
2775                                       private_data, private_data_len);
2776
2777                 cm_enter_timewait(cm_id_priv);
2778                 break;
2779         default:
2780                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2781                          be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2782                 ret = -EINVAL;
2783                 goto out;
2784         }
2785
2786         if (ret)
2787                 goto out;
2788
2789         ret = ib_post_send_mad(msg, NULL);
2790         if (ret)
2791                 cm_free_msg(msg);
2792
2793 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2794         return ret;
2795 }
2796 EXPORT_SYMBOL(ib_send_cm_rej);
2797
2798 static void cm_format_rej_event(struct cm_work *work)
2799 {
2800         struct cm_rej_msg *rej_msg;
2801         struct ib_cm_rej_event_param *param;
2802
2803         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2804         param = &work->cm_event.param.rej_rcvd;
2805         param->ari = rej_msg->ari;
2806         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2807         param->reason = __be16_to_cpu(rej_msg->reason);
2808         work->cm_event.private_data = &rej_msg->private_data;
2809 }
2810
2811 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2812 {
2813         struct cm_timewait_info *timewait_info;
2814         struct cm_id_private *cm_id_priv;
2815         __be32 remote_id;
2816
2817         remote_id = rej_msg->local_comm_id;
2818
2819         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2820                 spin_lock_irq(&cm.lock);
2821                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2822                                                   remote_id);
2823                 if (!timewait_info) {
2824                         spin_unlock_irq(&cm.lock);
2825                         return NULL;
2826                 }
2827                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2828                                       (timewait_info->work.local_id ^
2829                                        cm.random_id_operand));
2830                 if (cm_id_priv) {
2831                         if (cm_id_priv->id.remote_id == remote_id)
2832                                 atomic_inc(&cm_id_priv->refcount);
2833                         else
2834                                 cm_id_priv = NULL;
2835                 }
2836                 spin_unlock_irq(&cm.lock);
2837         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2838                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2839         else
2840                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2841
2842         return cm_id_priv;
2843 }
2844
2845 static int cm_rej_handler(struct cm_work *work)
2846 {
2847         struct cm_id_private *cm_id_priv;
2848         struct cm_rej_msg *rej_msg;
2849         int ret;
2850
2851         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2852         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2853         if (!cm_id_priv)
2854                 return -EINVAL;
2855
2856         cm_format_rej_event(work);
2857
2858         spin_lock_irq(&cm_id_priv->lock);
2859         switch (cm_id_priv->id.state) {
2860         case IB_CM_REQ_SENT:
2861         case IB_CM_MRA_REQ_RCVD:
2862         case IB_CM_REP_SENT:
2863         case IB_CM_MRA_REP_RCVD:
2864                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2865                 /* fall through */
2866         case IB_CM_REQ_RCVD:
2867         case IB_CM_MRA_REQ_SENT:
2868                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2869                         cm_enter_timewait(cm_id_priv);
2870                 else
2871                         cm_reset_to_idle(cm_id_priv);
2872                 break;
2873         case IB_CM_DREQ_SENT:
2874                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2875                 /* fall through */
2876         case IB_CM_REP_RCVD:
2877         case IB_CM_MRA_REP_SENT:
2878                 cm_enter_timewait(cm_id_priv);
2879                 break;
2880         case IB_CM_ESTABLISHED:
2881                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2882                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2883                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2884                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2885                                               cm_id_priv->msg);
2886                         cm_enter_timewait(cm_id_priv);
2887                         break;
2888                 }
2889                 /* fall through */
2890         default:
2891                 spin_unlock_irq(&cm_id_priv->lock);
2892                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2893                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2894                          cm_id_priv->id.state);
2895                 ret = -EINVAL;
2896                 goto out;
2897         }
2898
2899         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2900         if (!ret)
2901                 list_add_tail(&work->list, &cm_id_priv->work_list);
2902         spin_unlock_irq(&cm_id_priv->lock);
2903
2904         if (ret)
2905                 cm_process_work(cm_id_priv, work);
2906         else
2907                 cm_deref_id(cm_id_priv);
2908         return 0;
2909 out:
2910         cm_deref_id(cm_id_priv);
2911         return -EINVAL;
2912 }
2913
2914 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2915                    u8 service_timeout,
2916                    const void *private_data,
2917                    u8 private_data_len)
2918 {
2919         struct cm_id_private *cm_id_priv;
2920         struct ib_mad_send_buf *msg;
2921         enum ib_cm_state cm_state;
2922         enum ib_cm_lap_state lap_state;
2923         enum cm_msg_response msg_response;
2924         void *data;
2925         unsigned long flags;
2926         int ret;
2927
2928         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2929                 return -EINVAL;
2930
2931         data = cm_copy_private_data(private_data, private_data_len);
2932         if (IS_ERR(data))
2933                 return PTR_ERR(data);
2934
2935         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2936
2937         spin_lock_irqsave(&cm_id_priv->lock, flags);
2938         switch(cm_id_priv->id.state) {
2939         case IB_CM_REQ_RCVD:
2940                 cm_state = IB_CM_MRA_REQ_SENT;
2941                 lap_state = cm_id->lap_state;
2942                 msg_response = CM_MSG_RESPONSE_REQ;
2943                 break;
2944         case IB_CM_REP_RCVD:
2945                 cm_state = IB_CM_MRA_REP_SENT;
2946                 lap_state = cm_id->lap_state;
2947                 msg_response = CM_MSG_RESPONSE_REP;
2948                 break;
2949         case IB_CM_ESTABLISHED:
2950                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2951                         cm_state = cm_id->state;
2952                         lap_state = IB_CM_MRA_LAP_SENT;
2953                         msg_response = CM_MSG_RESPONSE_OTHER;
2954                         break;
2955                 }
2956                 /* fall through */
2957         default:
2958                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2959                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2960                          cm_id_priv->id.state);
2961                 ret = -EINVAL;
2962                 goto error1;
2963         }
2964
2965         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2966                 ret = cm_alloc_msg(cm_id_priv, &msg);
2967                 if (ret)
2968                         goto error1;
2969
2970                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2971                               msg_response, service_timeout,
2972                               private_data, private_data_len);
2973                 ret = ib_post_send_mad(msg, NULL);
2974                 if (ret)
2975                         goto error2;
2976         }
2977
2978         cm_id->state = cm_state;
2979         cm_id->lap_state = lap_state;
2980         cm_id_priv->service_timeout = service_timeout;
2981         cm_set_private_data(cm_id_priv, data, private_data_len);
2982         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2983         return 0;
2984
2985 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2986         kfree(data);
2987         return ret;
2988
2989 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2990         kfree(data);
2991         cm_free_msg(msg);
2992         return ret;
2993 }
2994 EXPORT_SYMBOL(ib_send_cm_mra);
2995
2996 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2997 {
2998         switch (cm_mra_get_msg_mraed(mra_msg)) {
2999         case CM_MSG_RESPONSE_REQ:
3000                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
3001         case CM_MSG_RESPONSE_REP:
3002         case CM_MSG_RESPONSE_OTHER:
3003                 return cm_acquire_id(mra_msg->remote_comm_id,
3004                                      mra_msg->local_comm_id);
3005         default:
3006                 return NULL;
3007         }
3008 }
3009
3010 static int cm_mra_handler(struct cm_work *work)
3011 {
3012         struct cm_id_private *cm_id_priv;
3013         struct cm_mra_msg *mra_msg;
3014         int timeout, ret;
3015
3016         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
3017         cm_id_priv = cm_acquire_mraed_id(mra_msg);
3018         if (!cm_id_priv)
3019                 return -EINVAL;
3020
3021         work->cm_event.private_data = &mra_msg->private_data;
3022         work->cm_event.param.mra_rcvd.service_timeout =
3023                                         cm_mra_get_service_timeout(mra_msg);
3024         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
3025                   cm_convert_to_ms(cm_id_priv->av.timeout);
3026
3027         spin_lock_irq(&cm_id_priv->lock);
3028         switch (cm_id_priv->id.state) {
3029         case IB_CM_REQ_SENT:
3030                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
3031                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3032                                   cm_id_priv->msg, timeout))
3033                         goto out;
3034                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
3035                 break;
3036         case IB_CM_REP_SENT:
3037                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
3038                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3039                                   cm_id_priv->msg, timeout))
3040                         goto out;
3041                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
3042                 break;
3043         case IB_CM_ESTABLISHED:
3044                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
3045                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
3046                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3047                                   cm_id_priv->msg, timeout)) {
3048                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
3049                                 atomic_long_inc(&work->port->
3050                                                 counter_group[CM_RECV_DUPLICATES].
3051                                                 counter[CM_MRA_COUNTER]);
3052                         goto out;
3053                 }
3054                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
3055                 break;
3056         case IB_CM_MRA_REQ_RCVD:
3057         case IB_CM_MRA_REP_RCVD:
3058                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3059                                 counter[CM_MRA_COUNTER]);
3060                 /* fall through */
3061         default:
3062                 pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
3063                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
3064                          cm_id_priv->id.state);
3065                 goto out;
3066         }
3067
3068         cm_id_priv->msg->context[1] = (void *) (unsigned long)
3069                                       cm_id_priv->id.state;
3070         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3071         if (!ret)
3072                 list_add_tail(&work->list, &cm_id_priv->work_list);
3073         spin_unlock_irq(&cm_id_priv->lock);
3074
3075         if (ret)
3076                 cm_process_work(cm_id_priv, work);
3077         else
3078                 cm_deref_id(cm_id_priv);
3079         return 0;
3080 out:
3081         spin_unlock_irq(&cm_id_priv->lock);
3082         cm_deref_id(cm_id_priv);
3083         return -EINVAL;
3084 }
3085
3086 static void cm_format_lap(struct cm_lap_msg *lap_msg,
3087                           struct cm_id_private *cm_id_priv,
3088                           struct sa_path_rec *alternate_path,
3089                           const void *private_data,
3090                           u8 private_data_len)
3091 {
3092         bool alt_ext = false;
3093
3094         if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA)
3095                 alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
3096                                               alternate_path->opa.slid);
3097         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
3098                           cm_form_tid(cm_id_priv));
3099         lap_msg->local_comm_id = cm_id_priv->id.local_id;
3100         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
3101         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
3102         /* todo: need remote CM response timeout */
3103         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
3104         lap_msg->alt_local_lid =
3105                 htons(ntohl(sa_path_get_slid(alternate_path)));
3106         lap_msg->alt_remote_lid =
3107                 htons(ntohl(sa_path_get_dlid(alternate_path)));
3108         lap_msg->alt_local_gid = alternate_path->sgid;
3109         lap_msg->alt_remote_gid = alternate_path->dgid;
3110         if (alt_ext) {
3111                 lap_msg->alt_local_gid.global.interface_id
3112                         = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid));
3113                 lap_msg->alt_remote_gid.global.interface_id
3114                         = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid));
3115         }
3116         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
3117         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
3118         lap_msg->alt_hop_limit = alternate_path->hop_limit;
3119         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
3120         cm_lap_set_sl(lap_msg, alternate_path->sl);
3121         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
3122         cm_lap_set_local_ack_timeout(lap_msg,
3123                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
3124                                alternate_path->packet_life_time));
3125
3126         if (private_data && private_data_len)
3127                 memcpy(lap_msg->private_data, private_data, private_data_len);
3128 }
3129
3130 int ib_send_cm_lap(struct ib_cm_id *cm_id,
3131                    struct sa_path_rec *alternate_path,
3132                    const void *private_data,
3133                    u8 private_data_len)
3134 {
3135         struct cm_id_private *cm_id_priv;
3136         struct ib_mad_send_buf *msg;
3137         unsigned long flags;
3138         int ret;
3139
3140         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
3141                 return -EINVAL;
3142
3143         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3144         spin_lock_irqsave(&cm_id_priv->lock, flags);
3145         if (cm_id->state != IB_CM_ESTABLISHED ||
3146             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
3147              cm_id->lap_state != IB_CM_LAP_IDLE)) {
3148                 ret = -EINVAL;
3149                 goto out;
3150         }
3151
3152         ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
3153                                  cm_id_priv);
3154         if (ret)
3155                 goto out;
3156         cm_id_priv->alt_av.timeout =
3157                         cm_ack_timeout(cm_id_priv->target_ack_delay,
3158                                        cm_id_priv->alt_av.timeout - 1);
3159
3160         ret = cm_alloc_msg(cm_id_priv, &msg);
3161         if (ret)
3162                 goto out;
3163
3164         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
3165                       alternate_path, private_data, private_data_len);
3166         msg->timeout_ms = cm_id_priv->timeout_ms;
3167         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
3168
3169         ret = ib_post_send_mad(msg, NULL);
3170         if (ret) {
3171                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3172                 cm_free_msg(msg);
3173                 return ret;
3174         }
3175
3176         cm_id->lap_state = IB_CM_LAP_SENT;
3177         cm_id_priv->msg = msg;
3178
3179 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3180         return ret;
3181 }
3182 EXPORT_SYMBOL(ib_send_cm_lap);
3183
3184 static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
3185                                         struct sa_path_rec *path)
3186 {
3187         u32 lid;
3188
3189         if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
3190                 sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid));
3191                 sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid));
3192         } else {
3193                 lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid);
3194                 sa_path_set_dlid(path, lid);
3195
3196                 lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid);
3197                 sa_path_set_slid(path, lid);
3198         }
3199 }
3200
3201 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
3202                                     struct sa_path_rec *path,
3203                                     struct cm_lap_msg *lap_msg)
3204 {
3205         path->dgid = lap_msg->alt_local_gid;
3206         path->sgid = lap_msg->alt_remote_gid;
3207         path->flow_label = cm_lap_get_flow_label(lap_msg);
3208         path->hop_limit = lap_msg->alt_hop_limit;
3209         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
3210         path->reversible = 1;
3211         path->pkey = cm_id_priv->pkey;
3212         path->sl = cm_lap_get_sl(lap_msg);
3213         path->mtu_selector = IB_SA_EQ;
3214         path->mtu = cm_id_priv->path_mtu;
3215         path->rate_selector = IB_SA_EQ;
3216         path->rate = cm_lap_get_packet_rate(lap_msg);
3217         path->packet_life_time_selector = IB_SA_EQ;
3218         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
3219         path->packet_life_time -= (path->packet_life_time > 0);
3220         cm_format_path_lid_from_lap(lap_msg, path);
3221 }
3222
3223 static int cm_lap_handler(struct cm_work *work)
3224 {
3225         struct cm_id_private *cm_id_priv;
3226         struct cm_lap_msg *lap_msg;
3227         struct ib_cm_lap_event_param *param;
3228         struct ib_mad_send_buf *msg = NULL;
3229         int ret;
3230
3231         /* Currently Alternate path messages are not supported for
3232          * RoCE link layer.
3233          */
3234         if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3235                                work->port->port_num))
3236                 return -EINVAL;
3237
3238         /* todo: verify LAP request and send reject APR if invalid. */
3239         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
3240         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
3241                                    lap_msg->local_comm_id);
3242         if (!cm_id_priv)
3243                 return -EINVAL;
3244
3245         param = &work->cm_event.param.lap_rcvd;
3246         memset(&work->path[0], 0, sizeof(work->path[1]));
3247         cm_path_set_rec_type(work->port->cm_dev->ib_device,
3248                              work->port->port_num,
3249                              &work->path[0],
3250                              &lap_msg->alt_local_gid);
3251         param->alternate_path = &work->path[0];
3252         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
3253         work->cm_event.private_data = &lap_msg->private_data;
3254
3255         spin_lock_irq(&cm_id_priv->lock);
3256         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
3257                 goto unlock;
3258
3259         switch (cm_id_priv->id.lap_state) {
3260         case IB_CM_LAP_UNINIT:
3261         case IB_CM_LAP_IDLE:
3262                 break;
3263         case IB_CM_MRA_LAP_SENT:
3264                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3265                                 counter[CM_LAP_COUNTER]);
3266                 msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
3267                 if (IS_ERR(msg))
3268                         goto unlock;
3269
3270                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
3271                               CM_MSG_RESPONSE_OTHER,
3272                               cm_id_priv->service_timeout,
3273                               cm_id_priv->private_data,
3274                               cm_id_priv->private_data_len);
3275                 spin_unlock_irq(&cm_id_priv->lock);
3276
3277                 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
3278                     ib_post_send_mad(msg, NULL))
3279                         cm_free_msg(msg);
3280                 goto deref;
3281         case IB_CM_LAP_RCVD:
3282                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3283                                 counter[CM_LAP_COUNTER]);
3284                 goto unlock;
3285         default:
3286                 goto unlock;
3287         }
3288
3289         ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
3290                                  work->mad_recv_wc->recv_buf.grh,
3291                                  &cm_id_priv->av);
3292         if (ret)
3293                 goto unlock;
3294
3295         cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
3296                            cm_id_priv);
3297         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3298         cm_id_priv->tid = lap_msg->hdr.tid;
3299         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3300         if (!ret)
3301                 list_add_tail(&work->list, &cm_id_priv->work_list);
3302         spin_unlock_irq(&cm_id_priv->lock);
3303
3304         if (ret)
3305                 cm_process_work(cm_id_priv, work);
3306         else
3307                 cm_deref_id(cm_id_priv);
3308         return 0;
3309
3310 unlock: spin_unlock_irq(&cm_id_priv->lock);
3311 deref:  cm_deref_id(cm_id_priv);
3312         return -EINVAL;
3313 }
3314
3315 static void cm_format_apr(struct cm_apr_msg *apr_msg,
3316                           struct cm_id_private *cm_id_priv,
3317                           enum ib_cm_apr_status status,
3318                           void *info,
3319                           u8 info_length,
3320                           const void *private_data,
3321                           u8 private_data_len)
3322 {
3323         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
3324         apr_msg->local_comm_id = cm_id_priv->id.local_id;
3325         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
3326         apr_msg->ap_status = (u8) status;
3327
3328         if (info && info_length) {
3329                 apr_msg->info_length = info_length;
3330                 memcpy(apr_msg->info, info, info_length);
3331         }
3332
3333         if (private_data && private_data_len)
3334                 memcpy(apr_msg->private_data, private_data, private_data_len);
3335 }
3336
3337 int ib_send_cm_apr(struct ib_cm_id *cm_id,
3338                    enum ib_cm_apr_status status,
3339                    void *info,
3340                    u8 info_length,
3341                    const void *private_data,
3342                    u8 private_data_len)
3343 {
3344         struct cm_id_private *cm_id_priv;
3345         struct ib_mad_send_buf *msg;
3346         unsigned long flags;
3347         int ret;
3348
3349         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
3350             (info && info_length > IB_CM_APR_INFO_LENGTH))
3351                 return -EINVAL;
3352
3353         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3354         spin_lock_irqsave(&cm_id_priv->lock, flags);
3355         if (cm_id->state != IB_CM_ESTABLISHED ||
3356             (cm_id->lap_state != IB_CM_LAP_RCVD &&
3357              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
3358                 ret = -EINVAL;
3359                 goto out;
3360         }
3361
3362         ret = cm_alloc_msg(cm_id_priv, &msg);
3363         if (ret)
3364                 goto out;
3365
3366         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
3367                       info, info_length, private_data, private_data_len);
3368         ret = ib_post_send_mad(msg, NULL);
3369         if (ret) {
3370                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3371                 cm_free_msg(msg);
3372                 return ret;
3373         }
3374
3375         cm_id->lap_state = IB_CM_LAP_IDLE;
3376 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3377         return ret;
3378 }
3379 EXPORT_SYMBOL(ib_send_cm_apr);
3380
3381 static int cm_apr_handler(struct cm_work *work)
3382 {
3383         struct cm_id_private *cm_id_priv;
3384         struct cm_apr_msg *apr_msg;
3385         int ret;
3386
3387         /* Currently Alternate path messages are not supported for
3388          * RoCE link layer.
3389          */
3390         if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3391                                work->port->port_num))
3392                 return -EINVAL;
3393
3394         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
3395         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
3396                                    apr_msg->local_comm_id);
3397         if (!cm_id_priv)
3398                 return -EINVAL; /* Unmatched reply. */
3399
3400         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
3401         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
3402         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
3403         work->cm_event.private_data = &apr_msg->private_data;
3404
3405         spin_lock_irq(&cm_id_priv->lock);
3406         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
3407             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
3408              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
3409                 spin_unlock_irq(&cm_id_priv->lock);
3410                 goto out;
3411         }
3412         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
3413         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3414         cm_id_priv->msg = NULL;
3415
3416         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3417         if (!ret)
3418                 list_add_tail(&work->list, &cm_id_priv->work_list);
3419         spin_unlock_irq(&cm_id_priv->lock);
3420
3421         if (ret)
3422                 cm_process_work(cm_id_priv, work);
3423         else
3424                 cm_deref_id(cm_id_priv);
3425         return 0;
3426 out:
3427         cm_deref_id(cm_id_priv);
3428         return -EINVAL;
3429 }
3430
3431 static int cm_timewait_handler(struct cm_work *work)
3432 {
3433         struct cm_timewait_info *timewait_info;
3434         struct cm_id_private *cm_id_priv;
3435         int ret;
3436
3437         timewait_info = (struct cm_timewait_info *)work;
3438         spin_lock_irq(&cm.lock);
3439         list_del(&timewait_info->list);
3440         spin_unlock_irq(&cm.lock);
3441
3442         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3443                                    timewait_info->work.remote_id);
3444         if (!cm_id_priv)
3445                 return -EINVAL;
3446
3447         spin_lock_irq(&cm_id_priv->lock);
3448         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3449             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3450                 spin_unlock_irq(&cm_id_priv->lock);
3451                 goto out;
3452         }
3453         cm_id_priv->id.state = IB_CM_IDLE;
3454         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3455         if (!ret)
3456                 list_add_tail(&work->list, &cm_id_priv->work_list);
3457         spin_unlock_irq(&cm_id_priv->lock);
3458
3459         if (ret)
3460                 cm_process_work(cm_id_priv, work);
3461         else
3462                 cm_deref_id(cm_id_priv);
3463         return 0;
3464 out:
3465         cm_deref_id(cm_id_priv);
3466         return -EINVAL;
3467 }
3468
3469 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3470                                struct cm_id_private *cm_id_priv,
3471                                struct ib_cm_sidr_req_param *param)
3472 {
3473         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3474                           cm_form_tid(cm_id_priv));
3475         sidr_req_msg->request_id = cm_id_priv->id.local_id;
3476         sidr_req_msg->pkey = param->path->pkey;
3477         sidr_req_msg->service_id = param->service_id;
3478
3479         if (param->private_data && param->private_data_len)
3480                 memcpy(sidr_req_msg->private_data, param->private_data,
3481                        param->private_data_len);
3482 }
3483
3484 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3485                         struct ib_cm_sidr_req_param *param)
3486 {
3487         struct cm_id_private *cm_id_priv;
3488         struct ib_mad_send_buf *msg;
3489         unsigned long flags;
3490         int ret;
3491
3492         if (!param->path || (param->private_data &&
3493              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3494                 return -EINVAL;
3495
3496         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3497         ret = cm_init_av_by_path(param->path, param->sgid_attr,
3498                                  &cm_id_priv->av,
3499                                  cm_id_priv);
3500         if (ret)
3501                 goto out;
3502
3503         cm_id->service_id = param->service_id;
3504         cm_id->service_mask = ~cpu_to_be64(0);
3505         cm_id_priv->timeout_ms = param->timeout_ms;
3506         cm_id_priv->max_cm_retries = param->max_cm_retries;
3507         ret = cm_alloc_msg(cm_id_priv, &msg);
3508         if (ret)
3509                 goto out;
3510
3511         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3512                            param);
3513         msg->timeout_ms = cm_id_priv->timeout_ms;
3514         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3515
3516         spin_lock_irqsave(&cm_id_priv->lock, flags);
3517         if (cm_id->state == IB_CM_IDLE)
3518                 ret = ib_post_send_mad(msg, NULL);
3519         else
3520                 ret = -EINVAL;
3521
3522         if (ret) {
3523                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3524                 cm_free_msg(msg);
3525                 goto out;
3526         }
3527         cm_id->state = IB_CM_SIDR_REQ_SENT;
3528         cm_id_priv->msg = msg;
3529         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3530 out:
3531         return ret;
3532 }
3533 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3534
3535 static void cm_format_sidr_req_event(struct cm_work *work,
3536                                      const struct cm_id_private *rx_cm_id,
3537                                      struct ib_cm_id *listen_id)
3538 {
3539         struct cm_sidr_req_msg *sidr_req_msg;
3540         struct ib_cm_sidr_req_event_param *param;
3541
3542         sidr_req_msg = (struct cm_sidr_req_msg *)
3543                                 work->mad_recv_wc->recv_buf.mad;
3544         param = &work->cm_event.param.sidr_req_rcvd;
3545         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3546         param->listen_id = listen_id;
3547         param->service_id = sidr_req_msg->service_id;
3548         param->bth_pkey = cm_get_bth_pkey(work);
3549         param->port = work->port->port_num;
3550         param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
3551         work->cm_event.private_data = &sidr_req_msg->private_data;
3552 }
3553
3554 static int cm_sidr_req_handler(struct cm_work *work)
3555 {
3556         struct ib_cm_id *cm_id;
3557         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3558         struct cm_sidr_req_msg *sidr_req_msg;
3559         struct ib_wc *wc;
3560         int ret;
3561
3562         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3563         if (IS_ERR(cm_id))
3564                 return PTR_ERR(cm_id);
3565         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3566
3567         /* Record SGID/SLID and request ID for lookup. */
3568         sidr_req_msg = (struct cm_sidr_req_msg *)
3569                                 work->mad_recv_wc->recv_buf.mad;
3570         wc = work->mad_recv_wc->wc;
3571         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3572         cm_id_priv->av.dgid.global.interface_id = 0;
3573         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3574                                       work->mad_recv_wc->recv_buf.grh,
3575                                       &cm_id_priv->av);
3576         if (ret)
3577                 goto out;
3578
3579         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3580         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3581         atomic_inc(&cm_id_priv->work_count);
3582
3583         spin_lock_irq(&cm.lock);
3584         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3585         if (cur_cm_id_priv) {
3586                 spin_unlock_irq(&cm.lock);
3587                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3588                                 counter[CM_SIDR_REQ_COUNTER]);
3589                 goto out; /* Duplicate message. */
3590         }
3591         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3592         cur_cm_id_priv = cm_find_listen(cm_id->device,
3593                                         sidr_req_msg->service_id);
3594         if (!cur_cm_id_priv) {
3595                 spin_unlock_irq(&cm.lock);
3596                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3597                 goto out; /* No match. */
3598         }
3599         atomic_inc(&cur_cm_id_priv->refcount);
3600         atomic_inc(&cm_id_priv->refcount);
3601         spin_unlock_irq(&cm.lock);
3602
3603         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3604         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3605         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3606         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3607
3608         cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3609         cm_process_work(cm_id_priv, work);
3610         cm_deref_id(cur_cm_id_priv);
3611         return 0;
3612 out:
3613         ib_destroy_cm_id(&cm_id_priv->id);
3614         return -EINVAL;
3615 }
3616
3617 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3618                                struct cm_id_private *cm_id_priv,
3619                                struct ib_cm_sidr_rep_param *param)
3620 {
3621         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3622                           cm_id_priv->tid);
3623         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3624         sidr_rep_msg->status = param->status;
3625         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3626         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3627         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3628
3629         if (param->info && param->info_length)
3630                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3631
3632         if (param->private_data && param->private_data_len)
3633                 memcpy(sidr_rep_msg->private_data, param->private_data,
3634                        param->private_data_len);
3635 }
3636
3637 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3638                         struct ib_cm_sidr_rep_param *param)
3639 {
3640         struct cm_id_private *cm_id_priv;
3641         struct ib_mad_send_buf *msg;
3642         unsigned long flags;
3643         int ret;
3644
3645         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3646             (param->private_data &&
3647              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3648                 return -EINVAL;
3649
3650         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3651         spin_lock_irqsave(&cm_id_priv->lock, flags);
3652         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3653                 ret = -EINVAL;
3654                 goto error;
3655         }
3656
3657         ret = cm_alloc_msg(cm_id_priv, &msg);
3658         if (ret)
3659                 goto error;
3660
3661         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3662                            param);
3663         ret = ib_post_send_mad(msg, NULL);
3664         if (ret) {
3665                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3666                 cm_free_msg(msg);
3667                 return ret;
3668         }
3669         cm_id->state = IB_CM_IDLE;
3670         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3671
3672         spin_lock_irqsave(&cm.lock, flags);
3673         if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3674                 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3675                 RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3676         }
3677         spin_unlock_irqrestore(&cm.lock, flags);
3678         return 0;
3679
3680 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3681         return ret;
3682 }
3683 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3684
3685 static void cm_format_sidr_rep_event(struct cm_work *work,
3686                                      const struct cm_id_private *cm_id_priv)
3687 {
3688         struct cm_sidr_rep_msg *sidr_rep_msg;
3689         struct ib_cm_sidr_rep_event_param *param;
3690
3691         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3692                                 work->mad_recv_wc->recv_buf.mad;
3693         param = &work->cm_event.param.sidr_rep_rcvd;
3694         param->status = sidr_rep_msg->status;
3695         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3696         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3697         param->info = &sidr_rep_msg->info;
3698         param->info_len = sidr_rep_msg->info_length;
3699         param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
3700         work->cm_event.private_data = &sidr_rep_msg->private_data;
3701 }
3702
3703 static int cm_sidr_rep_handler(struct cm_work *work)
3704 {
3705         struct cm_sidr_rep_msg *sidr_rep_msg;
3706         struct cm_id_private *cm_id_priv;
3707
3708         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3709                                 work->mad_recv_wc->recv_buf.mad;
3710         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3711         if (!cm_id_priv)
3712                 return -EINVAL; /* Unmatched reply. */
3713
3714         spin_lock_irq(&cm_id_priv->lock);
3715         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3716                 spin_unlock_irq(&cm_id_priv->lock);
3717                 goto out;
3718         }
3719         cm_id_priv->id.state = IB_CM_IDLE;
3720         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3721         spin_unlock_irq(&cm_id_priv->lock);
3722
3723         cm_format_sidr_rep_event(work, cm_id_priv);
3724         cm_process_work(cm_id_priv, work);
3725         return 0;
3726 out:
3727         cm_deref_id(cm_id_priv);
3728         return -EINVAL;
3729 }
3730
3731 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3732                                   enum ib_wc_status wc_status)
3733 {
3734         struct cm_id_private *cm_id_priv;
3735         struct ib_cm_event cm_event;
3736         enum ib_cm_state state;
3737         int ret;
3738
3739         memset(&cm_event, 0, sizeof cm_event);
3740         cm_id_priv = msg->context[0];
3741
3742         /* Discard old sends or ones without a response. */
3743         spin_lock_irq(&cm_id_priv->lock);
3744         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3745         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3746                 goto discard;
3747
3748         pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
3749                              state, ib_wc_status_msg(wc_status));
3750         switch (state) {
3751         case IB_CM_REQ_SENT:
3752         case IB_CM_MRA_REQ_RCVD:
3753                 cm_reset_to_idle(cm_id_priv);
3754                 cm_event.event = IB_CM_REQ_ERROR;
3755                 break;
3756         case IB_CM_REP_SENT:
3757         case IB_CM_MRA_REP_RCVD:
3758                 cm_reset_to_idle(cm_id_priv);
3759                 cm_event.event = IB_CM_REP_ERROR;
3760                 break;
3761         case IB_CM_DREQ_SENT:
3762                 cm_enter_timewait(cm_id_priv);
3763                 cm_event.event = IB_CM_DREQ_ERROR;
3764                 break;
3765         case IB_CM_SIDR_REQ_SENT:
3766                 cm_id_priv->id.state = IB_CM_IDLE;
3767                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3768                 break;
3769         default:
3770                 goto discard;
3771         }
3772         spin_unlock_irq(&cm_id_priv->lock);
3773         cm_event.param.send_status = wc_status;
3774
3775         /* No other events can occur on the cm_id at this point. */
3776         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3777         cm_free_msg(msg);
3778         if (ret)
3779                 ib_destroy_cm_id(&cm_id_priv->id);
3780         return;
3781 discard:
3782         spin_unlock_irq(&cm_id_priv->lock);
3783         cm_free_msg(msg);
3784 }
3785
3786 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3787                             struct ib_mad_send_wc *mad_send_wc)
3788 {
3789         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3790         struct cm_port *port;
3791         u16 attr_index;
3792
3793         port = mad_agent->context;
3794         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3795                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3796
3797         /*
3798          * If the send was in response to a received message (context[0] is not
3799          * set to a cm_id), and is not a REJ, then it is a send that was
3800          * manually retried.
3801          */
3802         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3803                 msg->retries = 1;
3804
3805         atomic_long_add(1 + msg->retries,
3806                         &port->counter_group[CM_XMIT].counter[attr_index]);
3807         if (msg->retries)
3808                 atomic_long_add(msg->retries,
3809                                 &port->counter_group[CM_XMIT_RETRIES].
3810                                 counter[attr_index]);
3811
3812         switch (mad_send_wc->status) {
3813         case IB_WC_SUCCESS:
3814         case IB_WC_WR_FLUSH_ERR:
3815                 cm_free_msg(msg);
3816                 break;
3817         default:
3818                 if (msg->context[0] && msg->context[1])
3819                         cm_process_send_error(msg, mad_send_wc->status);
3820                 else
3821                         cm_free_msg(msg);
3822                 break;
3823         }
3824 }
3825
3826 static void cm_work_handler(struct work_struct *_work)
3827 {
3828         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3829         int ret;
3830
3831         switch (work->cm_event.event) {
3832         case IB_CM_REQ_RECEIVED:
3833                 ret = cm_req_handler(work);
3834                 break;
3835         case IB_CM_MRA_RECEIVED:
3836                 ret = cm_mra_handler(work);
3837                 break;
3838         case IB_CM_REJ_RECEIVED:
3839                 ret = cm_rej_handler(work);
3840                 break;
3841         case IB_CM_REP_RECEIVED:
3842                 ret = cm_rep_handler(work);
3843                 break;
3844         case IB_CM_RTU_RECEIVED:
3845                 ret = cm_rtu_handler(work);
3846                 break;
3847         case IB_CM_USER_ESTABLISHED:
3848                 ret = cm_establish_handler(work);
3849                 break;
3850         case IB_CM_DREQ_RECEIVED:
3851                 ret = cm_dreq_handler(work);
3852                 break;
3853         case IB_CM_DREP_RECEIVED:
3854                 ret = cm_drep_handler(work);
3855                 break;
3856         case IB_CM_SIDR_REQ_RECEIVED:
3857                 ret = cm_sidr_req_handler(work);
3858                 break;
3859         case IB_CM_SIDR_REP_RECEIVED:
3860                 ret = cm_sidr_rep_handler(work);
3861                 break;
3862         case IB_CM_LAP_RECEIVED:
3863                 ret = cm_lap_handler(work);
3864                 break;
3865         case IB_CM_APR_RECEIVED:
3866                 ret = cm_apr_handler(work);
3867                 break;
3868         case IB_CM_TIMEWAIT_EXIT:
3869                 ret = cm_timewait_handler(work);
3870                 break;
3871         default:
3872                 pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
3873                 ret = -EINVAL;
3874                 break;
3875         }
3876         if (ret)
3877                 cm_free_work(work);
3878 }
3879
3880 static int cm_establish(struct ib_cm_id *cm_id)
3881 {
3882         struct cm_id_private *cm_id_priv;
3883         struct cm_work *work;
3884         unsigned long flags;
3885         int ret = 0;
3886         struct cm_device *cm_dev;
3887
3888         cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3889         if (!cm_dev)
3890                 return -ENODEV;
3891
3892         work = kmalloc(sizeof *work, GFP_ATOMIC);
3893         if (!work)
3894                 return -ENOMEM;
3895
3896         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3897         spin_lock_irqsave(&cm_id_priv->lock, flags);
3898         switch (cm_id->state)
3899         {
3900         case IB_CM_REP_SENT:
3901         case IB_CM_MRA_REP_RCVD:
3902                 cm_id->state = IB_CM_ESTABLISHED;
3903                 break;
3904         case IB_CM_ESTABLISHED:
3905                 ret = -EISCONN;
3906                 break;
3907         default:
3908                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
3909                          be32_to_cpu(cm_id->local_id), cm_id->state);
3910                 ret = -EINVAL;
3911                 break;
3912         }
3913         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3914
3915         if (ret) {
3916                 kfree(work);
3917                 goto out;
3918         }
3919
3920         /*
3921          * The CM worker thread may try to destroy the cm_id before it
3922          * can execute this work item.  To prevent potential deadlock,
3923          * we need to find the cm_id once we're in the context of the
3924          * worker thread, rather than holding a reference on it.
3925          */
3926         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3927         work->local_id = cm_id->local_id;
3928         work->remote_id = cm_id->remote_id;
3929         work->mad_recv_wc = NULL;
3930         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3931
3932         /* Check if the device started its remove_one */
3933         spin_lock_irqsave(&cm.lock, flags);
3934         if (!cm_dev->going_down) {
3935                 queue_delayed_work(cm.wq, &work->work, 0);
3936         } else {
3937                 kfree(work);
3938                 ret = -ENODEV;
3939         }
3940         spin_unlock_irqrestore(&cm.lock, flags);
3941
3942 out:
3943         return ret;
3944 }
3945
3946 static int cm_migrate(struct ib_cm_id *cm_id)
3947 {
3948         struct cm_id_private *cm_id_priv;
3949         struct cm_av tmp_av;
3950         unsigned long flags;
3951         int tmp_send_port_not_ready;
3952         int ret = 0;
3953
3954         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3955         spin_lock_irqsave(&cm_id_priv->lock, flags);
3956         if (cm_id->state == IB_CM_ESTABLISHED &&
3957             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3958              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3959                 cm_id->lap_state = IB_CM_LAP_IDLE;
3960                 /* Swap address vector */
3961                 tmp_av = cm_id_priv->av;
3962                 cm_id_priv->av = cm_id_priv->alt_av;
3963                 cm_id_priv->alt_av = tmp_av;
3964                 /* Swap port send ready state */
3965                 tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3966                 cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3967                 cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3968         } else
3969                 ret = -EINVAL;
3970         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3971
3972         return ret;
3973 }
3974
3975 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3976 {
3977         int ret;
3978
3979         switch (event) {
3980         case IB_EVENT_COMM_EST:
3981                 ret = cm_establish(cm_id);
3982                 break;
3983         case IB_EVENT_PATH_MIG:
3984                 ret = cm_migrate(cm_id);
3985                 break;
3986         default:
3987                 ret = -EINVAL;
3988         }
3989         return ret;
3990 }
3991 EXPORT_SYMBOL(ib_cm_notify);
3992
3993 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3994                             struct ib_mad_send_buf *send_buf,
3995                             struct ib_mad_recv_wc *mad_recv_wc)
3996 {
3997         struct cm_port *port = mad_agent->context;
3998         struct cm_work *work;
3999         enum ib_cm_event_type event;
4000         bool alt_path = false;
4001         u16 attr_id;
4002         int paths = 0;
4003         int going_down = 0;
4004
4005         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
4006         case CM_REQ_ATTR_ID:
4007                 alt_path = cm_req_has_alt_path((struct cm_req_msg *)
4008                                                 mad_recv_wc->recv_buf.mad);
4009                 paths = 1 + (alt_path != 0);
4010                 event = IB_CM_REQ_RECEIVED;
4011                 break;
4012         case CM_MRA_ATTR_ID:
4013                 event = IB_CM_MRA_RECEIVED;
4014                 break;
4015         case CM_REJ_ATTR_ID:
4016                 event = IB_CM_REJ_RECEIVED;
4017                 break;
4018         case CM_REP_ATTR_ID:
4019                 event = IB_CM_REP_RECEIVED;
4020                 break;
4021         case CM_RTU_ATTR_ID:
4022                 event = IB_CM_RTU_RECEIVED;
4023                 break;
4024         case CM_DREQ_ATTR_ID:
4025                 event = IB_CM_DREQ_RECEIVED;
4026                 break;
4027         case CM_DREP_ATTR_ID:
4028                 event = IB_CM_DREP_RECEIVED;
4029                 break;
4030         case CM_SIDR_REQ_ATTR_ID:
4031                 event = IB_CM_SIDR_REQ_RECEIVED;
4032                 break;
4033         case CM_SIDR_REP_ATTR_ID:
4034                 event = IB_CM_SIDR_REP_RECEIVED;
4035                 break;
4036         case CM_LAP_ATTR_ID:
4037                 paths = 1;
4038                 event = IB_CM_LAP_RECEIVED;
4039                 break;
4040         case CM_APR_ATTR_ID:
4041                 event = IB_CM_APR_RECEIVED;
4042                 break;
4043         default:
4044                 ib_free_recv_mad(mad_recv_wc);
4045                 return;
4046         }
4047
4048         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
4049         atomic_long_inc(&port->counter_group[CM_RECV].
4050                         counter[attr_id - CM_ATTR_ID_OFFSET]);
4051
4052         work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
4053                        GFP_KERNEL);
4054         if (!work) {
4055                 ib_free_recv_mad(mad_recv_wc);
4056                 return;
4057         }
4058
4059         INIT_DELAYED_WORK(&work->work, cm_work_handler);
4060         work->cm_event.event = event;
4061         work->mad_recv_wc = mad_recv_wc;
4062         work->port = port;
4063
4064         /* Check if the device started its remove_one */
4065         spin_lock_irq(&cm.lock);
4066         if (!port->cm_dev->going_down)
4067                 queue_delayed_work(cm.wq, &work->work, 0);
4068         else
4069                 going_down = 1;
4070         spin_unlock_irq(&cm.lock);
4071
4072         if (going_down) {
4073                 kfree(work);
4074                 ib_free_recv_mad(mad_recv_wc);
4075         }
4076 }
4077
4078 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
4079                                 struct ib_qp_attr *qp_attr,
4080                                 int *qp_attr_mask)
4081 {
4082         unsigned long flags;
4083         int ret;
4084
4085         spin_lock_irqsave(&cm_id_priv->lock, flags);
4086         switch (cm_id_priv->id.state) {
4087         case IB_CM_REQ_SENT:
4088         case IB_CM_MRA_REQ_RCVD:
4089         case IB_CM_REQ_RCVD:
4090         case IB_CM_MRA_REQ_SENT:
4091         case IB_CM_REP_RCVD:
4092         case IB_CM_MRA_REP_SENT:
4093         case IB_CM_REP_SENT:
4094         case IB_CM_MRA_REP_RCVD:
4095         case IB_CM_ESTABLISHED:
4096                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
4097                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
4098                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
4099                 if (cm_id_priv->responder_resources)
4100                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
4101                                                     IB_ACCESS_REMOTE_ATOMIC;
4102                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
4103                 qp_attr->port_num = cm_id_priv->av.port->port_num;
4104                 ret = 0;
4105                 break;
4106         default:
4107                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4108                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4109                          cm_id_priv->id.state);
4110                 ret = -EINVAL;
4111                 break;
4112         }
4113         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4114         return ret;
4115 }
4116
4117 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
4118                                struct ib_qp_attr *qp_attr,
4119                                int *qp_attr_mask)
4120 {
4121         unsigned long flags;
4122         int ret;
4123
4124         spin_lock_irqsave(&cm_id_priv->lock, flags);
4125         switch (cm_id_priv->id.state) {
4126         case IB_CM_REQ_RCVD:
4127         case IB_CM_MRA_REQ_SENT:
4128         case IB_CM_REP_RCVD:
4129         case IB_CM_MRA_REP_SENT:
4130         case IB_CM_REP_SENT:
4131         case IB_CM_MRA_REP_RCVD:
4132         case IB_CM_ESTABLISHED:
4133                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
4134                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
4135                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
4136                 qp_attr->path_mtu = cm_id_priv->path_mtu;
4137                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
4138                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
4139                 if (cm_id_priv->qp_type == IB_QPT_RC ||
4140                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
4141                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
4142                                          IB_QP_MIN_RNR_TIMER;
4143                         qp_attr->max_dest_rd_atomic =
4144                                         cm_id_priv->responder_resources;
4145                         qp_attr->min_rnr_timer = 0;
4146                 }
4147                 if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4148                         *qp_attr_mask |= IB_QP_ALT_PATH;
4149                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4150                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4151                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4152                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4153                 }
4154                 ret = 0;
4155                 break;
4156         default:
4157                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4158                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4159                          cm_id_priv->id.state);
4160                 ret = -EINVAL;
4161                 break;
4162         }
4163         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4164         return ret;
4165 }
4166
4167 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
4168                                struct ib_qp_attr *qp_attr,
4169                                int *qp_attr_mask)
4170 {
4171         unsigned long flags;
4172         int ret;
4173
4174         spin_lock_irqsave(&cm_id_priv->lock, flags);
4175         switch (cm_id_priv->id.state) {
4176         /* Allow transition to RTS before sending REP */
4177         case IB_CM_REQ_RCVD:
4178         case IB_CM_MRA_REQ_SENT:
4179
4180         case IB_CM_REP_RCVD:
4181         case IB_CM_MRA_REP_SENT:
4182         case IB_CM_REP_SENT:
4183         case IB_CM_MRA_REP_RCVD:
4184         case IB_CM_ESTABLISHED:
4185                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
4186                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
4187                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
4188                         switch (cm_id_priv->qp_type) {
4189                         case IB_QPT_RC:
4190                         case IB_QPT_XRC_INI:
4191                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
4192                                                  IB_QP_MAX_QP_RD_ATOMIC;
4193                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
4194                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
4195                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
4196                                 /* fall through */
4197                         case IB_QPT_XRC_TGT:
4198                                 *qp_attr_mask |= IB_QP_TIMEOUT;
4199                                 qp_attr->timeout = cm_id_priv->av.timeout;
4200                                 break;
4201                         default:
4202                                 break;
4203                         }
4204                         if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4205                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
4206                                 qp_attr->path_mig_state = IB_MIG_REARM;
4207                         }
4208                 } else {
4209                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
4210                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4211                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4212                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4213                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4214                         qp_attr->path_mig_state = IB_MIG_REARM;
4215                 }
4216                 ret = 0;
4217                 break;
4218         default:
4219                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4220                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4221                          cm_id_priv->id.state);
4222                 ret = -EINVAL;
4223                 break;
4224         }
4225         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4226         return ret;
4227 }
4228
4229 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
4230                        struct ib_qp_attr *qp_attr,
4231                        int *qp_attr_mask)
4232 {
4233         struct cm_id_private *cm_id_priv;
4234         int ret;
4235
4236         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
4237         switch (qp_attr->qp_state) {
4238         case IB_QPS_INIT:
4239                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
4240                 break;
4241         case IB_QPS_RTR:
4242                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
4243                 break;
4244         case IB_QPS_RTS:
4245                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
4246                 break;
4247         default:
4248                 ret = -EINVAL;
4249                 break;
4250         }
4251         return ret;
4252 }
4253 EXPORT_SYMBOL(ib_cm_init_qp_attr);
4254
4255 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
4256                                char *buf)
4257 {
4258         struct cm_counter_group *group;
4259         struct cm_counter_attribute *cm_attr;
4260
4261         group = container_of(obj, struct cm_counter_group, obj);
4262         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
4263
4264         return sprintf(buf, "%ld\n",
4265                        atomic_long_read(&group->counter[cm_attr->index]));
4266 }
4267
4268 static const struct sysfs_ops cm_counter_ops = {
4269         .show = cm_show_counter
4270 };
4271
4272 static struct kobj_type cm_counter_obj_type = {
4273         .sysfs_ops = &cm_counter_ops,
4274         .default_attrs = cm_counter_default_attrs
4275 };
4276
4277 static void cm_release_port_obj(struct kobject *obj)
4278 {
4279         struct cm_port *cm_port;
4280
4281         cm_port = container_of(obj, struct cm_port, port_obj);
4282         kfree(cm_port);
4283 }
4284
4285 static struct kobj_type cm_port_obj_type = {
4286         .release = cm_release_port_obj
4287 };
4288
4289 static char *cm_devnode(struct device *dev, umode_t *mode)
4290 {
4291         if (mode)
4292                 *mode = 0666;
4293         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
4294 }
4295
4296 struct class cm_class = {
4297         .owner   = THIS_MODULE,
4298         .name    = "infiniband_cm",
4299         .devnode = cm_devnode,
4300 };
4301 EXPORT_SYMBOL(cm_class);
4302
4303 static int cm_create_port_fs(struct cm_port *port)
4304 {
4305         int i, ret;
4306
4307         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
4308                                    &port->cm_dev->device->kobj,
4309                                    "%d", port->port_num);
4310         if (ret) {
4311                 kfree(port);
4312                 return ret;
4313         }
4314
4315         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
4316                 ret = kobject_init_and_add(&port->counter_group[i].obj,
4317                                            &cm_counter_obj_type,
4318                                            &port->port_obj,
4319                                            "%s", counter_group_names[i]);
4320                 if (ret)
4321                         goto error;
4322         }
4323
4324         return 0;
4325
4326 error:
4327         while (i--)
4328                 kobject_put(&port->counter_group[i].obj);
4329         kobject_put(&port->port_obj);
4330         return ret;
4331
4332 }
4333
4334 static void cm_remove_port_fs(struct cm_port *port)
4335 {
4336         int i;
4337
4338         for (i = 0; i < CM_COUNTER_GROUPS; i++)
4339                 kobject_put(&port->counter_group[i].obj);
4340
4341         kobject_put(&port->port_obj);
4342 }
4343
4344 static void cm_add_one(struct ib_device *ib_device)
4345 {
4346         struct cm_device *cm_dev;
4347         struct cm_port *port;
4348         struct ib_mad_reg_req reg_req = {
4349                 .mgmt_class = IB_MGMT_CLASS_CM,
4350                 .mgmt_class_version = IB_CM_CLASS_VERSION,
4351         };
4352         struct ib_port_modify port_modify = {
4353                 .set_port_cap_mask = IB_PORT_CM_SUP
4354         };
4355         unsigned long flags;
4356         int ret;
4357         int count = 0;
4358         u8 i;
4359
4360         cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
4361                          GFP_KERNEL);
4362         if (!cm_dev)
4363                 return;
4364
4365         cm_dev->ib_device = ib_device;
4366         cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
4367         cm_dev->going_down = 0;
4368         cm_dev->device = device_create(&cm_class, &ib_device->dev,
4369                                        MKDEV(0, 0), NULL,
4370                                        "%s", ib_device->name);
4371         if (IS_ERR(cm_dev->device)) {
4372                 kfree(cm_dev);
4373                 return;
4374         }
4375
4376         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
4377         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4378                 if (!rdma_cap_ib_cm(ib_device, i))
4379                         continue;
4380
4381                 port = kzalloc(sizeof *port, GFP_KERNEL);
4382                 if (!port)
4383                         goto error1;
4384
4385                 cm_dev->port[i-1] = port;
4386                 port->cm_dev = cm_dev;
4387                 port->port_num = i;
4388
4389                 INIT_LIST_HEAD(&port->cm_priv_prim_list);
4390                 INIT_LIST_HEAD(&port->cm_priv_altr_list);
4391
4392                 ret = cm_create_port_fs(port);
4393                 if (ret)
4394                         goto error1;
4395
4396                 port->mad_agent = ib_register_mad_agent(ib_device, i,
4397                                                         IB_QPT_GSI,
4398                                                         &reg_req,
4399                                                         0,
4400                                                         cm_send_handler,
4401                                                         cm_recv_handler,
4402                                                         port,
4403                                                         0);
4404                 if (IS_ERR(port->mad_agent))
4405                         goto error2;
4406
4407                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
4408                 if (ret)
4409                         goto error3;
4410
4411                 count++;
4412         }
4413
4414         if (!count)
4415                 goto free;
4416
4417         ib_set_client_data(ib_device, &cm_client, cm_dev);
4418
4419         write_lock_irqsave(&cm.device_lock, flags);
4420         list_add_tail(&cm_dev->list, &cm.device_list);
4421         write_unlock_irqrestore(&cm.device_lock, flags);
4422         return;
4423
4424 error3:
4425         ib_unregister_mad_agent(port->mad_agent);
4426 error2:
4427         cm_remove_port_fs(port);
4428 error1:
4429         port_modify.set_port_cap_mask = 0;
4430         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
4431         while (--i) {
4432                 if (!rdma_cap_ib_cm(ib_device, i))
4433                         continue;
4434
4435                 port = cm_dev->port[i-1];
4436                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4437                 ib_unregister_mad_agent(port->mad_agent);
4438                 cm_remove_port_fs(port);
4439         }
4440 free:
4441         device_unregister(cm_dev->device);
4442         kfree(cm_dev);
4443 }
4444
4445 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4446 {
4447         struct cm_device *cm_dev = client_data;
4448         struct cm_port *port;
4449         struct cm_id_private *cm_id_priv;
4450         struct ib_mad_agent *cur_mad_agent;
4451         struct ib_port_modify port_modify = {
4452                 .clr_port_cap_mask = IB_PORT_CM_SUP
4453         };
4454         unsigned long flags;
4455         int i;
4456
4457         if (!cm_dev)
4458                 return;
4459
4460         write_lock_irqsave(&cm.device_lock, flags);
4461         list_del(&cm_dev->list);
4462         write_unlock_irqrestore(&cm.device_lock, flags);
4463
4464         spin_lock_irq(&cm.lock);
4465         cm_dev->going_down = 1;
4466         spin_unlock_irq(&cm.lock);
4467
4468         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4469                 if (!rdma_cap_ib_cm(ib_device, i))
4470                         continue;
4471
4472                 port = cm_dev->port[i-1];
4473                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4474                 /* Mark all the cm_id's as not valid */
4475                 spin_lock_irq(&cm.lock);
4476                 list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4477                         cm_id_priv->altr_send_port_not_ready = 1;
4478                 list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4479                         cm_id_priv->prim_send_port_not_ready = 1;
4480                 spin_unlock_irq(&cm.lock);
4481                 /*
4482                  * We flush the queue here after the going_down set, this
4483                  * verify that no new works will be queued in the recv handler,
4484                  * after that we can call the unregister_mad_agent
4485                  */
4486                 flush_workqueue(cm.wq);
4487                 spin_lock_irq(&cm.state_lock);
4488                 cur_mad_agent = port->mad_agent;
4489                 port->mad_agent = NULL;
4490                 spin_unlock_irq(&cm.state_lock);
4491                 ib_unregister_mad_agent(cur_mad_agent);
4492                 cm_remove_port_fs(port);
4493         }
4494
4495         device_unregister(cm_dev->device);
4496         kfree(cm_dev);
4497 }
4498
4499 static int __init ib_cm_init(void)
4500 {
4501         int ret;
4502
4503         memset(&cm, 0, sizeof cm);
4504         INIT_LIST_HEAD(&cm.device_list);
4505         rwlock_init(&cm.device_lock);
4506         spin_lock_init(&cm.lock);
4507         spin_lock_init(&cm.state_lock);
4508         cm.listen_service_table = RB_ROOT;
4509         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4510         cm.remote_id_table = RB_ROOT;
4511         cm.remote_qp_table = RB_ROOT;
4512         cm.remote_sidr_table = RB_ROOT;
4513         idr_init(&cm.local_id_table);
4514         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4515         INIT_LIST_HEAD(&cm.timewait_list);
4516
4517         ret = class_register(&cm_class);
4518         if (ret) {
4519                 ret = -ENOMEM;
4520                 goto error1;
4521         }
4522
4523         cm.wq = alloc_workqueue("ib_cm", 0, 1);
4524         if (!cm.wq) {
4525                 ret = -ENOMEM;
4526                 goto error2;
4527         }
4528
4529         ret = ib_register_client(&cm_client);
4530         if (ret)
4531                 goto error3;
4532
4533         return 0;
4534 error3:
4535         destroy_workqueue(cm.wq);
4536 error2:
4537         class_unregister(&cm_class);
4538 error1:
4539         idr_destroy(&cm.local_id_table);
4540         return ret;
4541 }
4542
4543 static void __exit ib_cm_cleanup(void)
4544 {
4545         struct cm_timewait_info *timewait_info, *tmp;
4546
4547         spin_lock_irq(&cm.lock);
4548         list_for_each_entry(timewait_info, &cm.timewait_list, list)
4549                 cancel_delayed_work(&timewait_info->work.work);
4550         spin_unlock_irq(&cm.lock);
4551
4552         ib_unregister_client(&cm_client);
4553         destroy_workqueue(cm.wq);
4554
4555         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4556                 list_del(&timewait_info->list);
4557                 kfree(timewait_info);
4558         }
4559
4560         class_unregister(&cm_class);
4561         idr_destroy(&cm.local_id_table);
4562 }
4563
4564 module_init(ib_cm_init);
4565 module_exit(ib_cm_cleanup);
4566