Merge tag 'net-6.16-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-block.git] / net / smc / smc_core.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
0cfdd8f9
UB
2/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * Basic Transport Functions exploiting Infiniband API
6 *
7 * Copyright IBM Corp. 2016
8 *
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
10 */
11
12#include <linux/socket.h>
13#include <linux/if_vlan.h>
14#include <linux/random.h>
15#include <linux/workqueue.h>
6dabd405 16#include <linux/wait.h>
a33a803c 17#include <linux/reboot.h>
92f3cb0e 18#include <linux/mutex.h>
099b990b
GG
19#include <linux/list.h>
20#include <linux/smc.h>
0cfdd8f9
UB
21#include <net/tcp.h>
22#include <net/sock.h>
23#include <rdma/ib_verbs.h>
ddb457c6 24#include <rdma/ib_cache.h>
0cfdd8f9
UB
25
26#include "smc.h"
27#include "smc_clc.h"
28#include "smc_core.h"
29#include "smc_ib.h"
f38ba179 30#include "smc_wr.h"
9bf9abea 31#include "smc_llc.h"
5f08318f 32#include "smc_cdc.h"
b38d7324 33#include "smc_close.h"
c6ba7c9b 34#include "smc_ism.h"
099b990b 35#include "smc_netlink.h"
e0e4b8fa 36#include "smc_stats.h"
a3a0e81b 37#include "smc_tracepoint.h"
0cfdd8f9 38
5bc11ddb
UB
39#define SMC_LGR_NUM_INCR 256
40#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
7f58a1ad 41#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
0cfdd8f9 42
a3db10ef 43struct smc_lgr_list smc_lgr_list = { /* established link groups */
9fda3510
HW
44 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
45 .list = LIST_HEAD_INIT(smc_lgr_list.list),
46 .num = 0,
47};
9bf9abea 48
29115cef 49static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
6dabd405
UB
50static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
51
6511aad3
HW
52static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
53 struct smc_buf_desc *buf_desc);
5f78fe96 54static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
a6920d1d 55
541afa10 56static void smc_link_down_work(struct work_struct *work);
1f90a05d 57
a0a62ee1
UB
58/* return head of link group list and its lock for a given link group */
59static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
60 spinlock_t **lgr_lock)
61{
62 if (lgr->is_smcd) {
63 *lgr_lock = &lgr->smcd->lgr_lock;
64 return &lgr->smcd->lgr_list;
65 }
66
67 *lgr_lock = &smc_lgr_list.lock;
68 return &smc_lgr_list.list;
69}
70
ddc99286
GG
71static void smc_ibdev_cnt_inc(struct smc_link *lnk)
72{
73 atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
74}
75
76static void smc_ibdev_cnt_dec(struct smc_link *lnk)
77{
78 atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
79}
80
97cdbc42
KG
81static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
82{
83 /* client link group creation always follows the server link group
84 * creation. For client use a somewhat higher removal delay time,
85 * otherwise there is a risk of out-of-sync link groups.
86 */
f9aab6f2 87 if (!lgr->freeing) {
8e316b9e
UB
88 mod_delayed_work(system_wq, &lgr->free_work,
89 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
90 SMC_LGR_FREE_DELAY_CLNT :
91 SMC_LGR_FREE_DELAY_SERV);
92 }
97cdbc42
KG
93}
94
0cfdd8f9
UB
95/* Register connection's alert token in our lookup structure.
96 * To use rbtrees we have to implement our own insert core.
97 * Requires @conns_lock
98 * @smc connection to register
99 * Returns 0 on success, != otherwise.
100 */
101static void smc_lgr_add_alert_token(struct smc_connection *conn)
102{
103 struct rb_node **link, *parent = NULL;
104 u32 token = conn->alert_token_local;
105
106 link = &conn->lgr->conns_all.rb_node;
107 while (*link) {
108 struct smc_connection *cur = rb_entry(*link,
109 struct smc_connection, alert_node);
110
111 parent = *link;
112 if (cur->alert_token_local > token)
113 link = &parent->rb_left;
114 else
115 link = &parent->rb_right;
116 }
117 /* Put the new node there */
118 rb_link_node(&conn->alert_node, parent, link);
119 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
120}
121
56bc3b20
KG
122/* assign an SMC-R link to the connection */
123static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
124{
125 enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
126 SMC_LNK_ACTIVE;
127 int i, j;
128
129 /* do link balancing */
35112271 130 conn->lnk = NULL; /* reset conn->lnk first */
56bc3b20
KG
131 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
132 struct smc_link *lnk = &conn->lgr->lnk[i];
133
ad6c111b 134 if (lnk->state != expected || lnk->link_is_asym)
56bc3b20
KG
135 continue;
136 if (conn->lgr->role == SMC_CLNT) {
137 conn->lnk = lnk; /* temporary, SMC server assigns link*/
138 break;
139 }
140 if (conn->lgr->conns_num % 2) {
141 for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
142 struct smc_link *lnk2;
143
144 lnk2 = &conn->lgr->lnk[j];
ad6c111b
KG
145 if (lnk2->state == expected &&
146 !lnk2->link_is_asym) {
56bc3b20
KG
147 conn->lnk = lnk2;
148 break;
149 }
150 }
151 }
152 if (!conn->lnk)
153 conn->lnk = lnk;
154 break;
155 }
156 if (!conn->lnk)
157 return SMC_CLC_DECL_NOACTLINK;
07d51580 158 atomic_inc(&conn->lnk->conn_cnt);
56bc3b20
KG
159 return 0;
160}
161
0cfdd8f9
UB
162/* Register connection in link group by assigning an alert token
163 * registered in a search tree.
164 * Requires @conns_lock
165 * Note that '0' is a reserved value and not assigned.
166 */
56bc3b20 167static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
0cfdd8f9
UB
168{
169 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
170 static atomic_t nexttoken = ATOMIC_INIT(0);
56bc3b20 171 int rc;
0cfdd8f9 172
56bc3b20
KG
173 if (!conn->lgr->is_smcd) {
174 rc = smcr_lgr_conn_assign_link(conn, first);
36595d8a
WG
175 if (rc) {
176 conn->lgr = NULL;
56bc3b20 177 return rc;
36595d8a 178 }
56bc3b20 179 }
0cfdd8f9
UB
180 /* find a new alert_token_local value not yet used by some connection
181 * in this link group
182 */
183 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
184 while (!conn->alert_token_local) {
185 conn->alert_token_local = atomic_inc_return(&nexttoken);
186 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
187 conn->alert_token_local = 0;
188 }
189 smc_lgr_add_alert_token(conn);
190 conn->lgr->conns_num++;
b9247544 191 return 0;
0cfdd8f9
UB
192}
193
194/* Unregister connection and reset the alert token of the given connection<
195 */
196static void __smc_lgr_unregister_conn(struct smc_connection *conn)
197{
198 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
199 struct smc_link_group *lgr = conn->lgr;
200
201 rb_erase(&conn->alert_node, &lgr->conns_all);
07d51580
GG
202 if (conn->lnk)
203 atomic_dec(&conn->lnk->conn_cnt);
0cfdd8f9
UB
204 lgr->conns_num--;
205 conn->alert_token_local = 0;
0cfdd8f9
UB
206 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
207}
208
fb692ec4 209/* Unregister connection from lgr
0cfdd8f9
UB
210 */
211static void smc_lgr_unregister_conn(struct smc_connection *conn)
212{
213 struct smc_link_group *lgr = conn->lgr;
0cfdd8f9 214
ea89c6c0 215 if (!smc_conn_lgr_valid(conn))
77f838ac 216 return;
0cfdd8f9
UB
217 write_lock_bh(&lgr->conns_lock);
218 if (conn->alert_token_local) {
0cfdd8f9
UB
219 __smc_lgr_unregister_conn(conn);
220 }
221 write_unlock_bh(&lgr->conns_lock);
0cfdd8f9
UB
222}
223
d386d59b
WG
224static void smc_lgr_buf_list_add(struct smc_link_group *lgr,
225 bool is_rmb,
226 struct list_head *buf_list,
227 struct smc_buf_desc *buf_desc)
228{
229 list_add(&buf_desc->list, buf_list);
230 if (is_rmb) {
231 lgr->alloc_rmbs += buf_desc->len;
232 lgr->alloc_rmbs +=
233 lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
234 } else {
235 lgr->alloc_sndbufs += buf_desc->len;
236 }
237}
238
239static void smc_lgr_buf_list_del(struct smc_link_group *lgr,
240 bool is_rmb,
241 struct smc_buf_desc *buf_desc)
242{
243 list_del(&buf_desc->list);
244 if (is_rmb) {
245 lgr->alloc_rmbs -= buf_desc->len;
246 lgr->alloc_rmbs -=
247 lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
248 } else {
249 lgr->alloc_sndbufs -= buf_desc->len;
250 }
251}
252
099b990b
GG
253int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
254{
255 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
256 char hostname[SMC_MAX_HOSTNAME_LEN + 1];
257 char smc_seid[SMC_MAX_EID_LEN + 1];
099b990b
GG
258 struct nlattr *attrs;
259 u8 *seid = NULL;
260 u8 *host = NULL;
261 void *nlh;
262
263 nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
264 &smc_gen_nl_family, NLM_F_MULTI,
265 SMC_NETLINK_GET_SYS_INFO);
266 if (!nlh)
267 goto errmsg;
268 if (cb_ctx->pos[0])
269 goto errout;
270 attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
271 if (!attrs)
272 goto errout;
273 if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
274 goto errattr;
275 if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
276 goto errattr;
277 if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
278 goto errattr;
b0539f5e
KG
279 if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
280 goto errattr;
099b990b
GG
281 smc_clc_get_hostname(&host);
282 if (host) {
25fe2c9c
JK
283 memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
284 hostname[SMC_MAX_HOSTNAME_LEN] = 0;
099b990b
GG
285 if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
286 goto errattr;
287 }
11a26c59
KG
288 if (smc_ism_is_v2_capable()) {
289 smc_ism_get_system_eid(&seid);
8a446536
GG
290 memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
291 smc_seid[SMC_MAX_EID_LEN] = 0;
099b990b
GG
292 if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
293 goto errattr;
294 }
295 nla_nest_end(skb, attrs);
296 genlmsg_end(skb, nlh);
297 cb_ctx->pos[0] = 1;
298 return skb->len;
299
300errattr:
301 nla_nest_cancel(skb, attrs);
302errout:
303 genlmsg_cancel(skb, nlh);
304errmsg:
305 return skb->len;
306}
307
b0539f5e
KG
308/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
309static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
310 struct sk_buff *skb,
311 struct netlink_callback *cb,
312 struct nlattr *v2_attrs)
313{
314 char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
315 char smc_eid[SMC_MAX_EID_LEN + 1];
316
317 if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
318 goto errv2attr;
319 if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
320 goto errv2attr;
321 if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
322 goto errv2attr;
323 memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
324 smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
325 if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
326 goto errv2attr;
327 memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
328 smc_eid[SMC_MAX_EID_LEN] = 0;
329 if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
330 goto errv2attr;
331
332 nla_nest_end(skb, v2_attrs);
333 return 0;
334
335errv2attr:
336 nla_nest_cancel(skb, v2_attrs);
337 return -EMSGSIZE;
338}
339
340static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
341 struct sk_buff *skb,
342 struct netlink_callback *cb)
343{
344 struct nlattr *v2_attrs;
345
346 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
347 if (!v2_attrs)
348 goto errattr;
349 if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
350 goto errv2attr;
bbed596c
GW
351 if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_CONNS, lgr->max_conns))
352 goto errv2attr;
353 if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_LINKS, lgr->max_links))
354 goto errv2attr;
b0539f5e
KG
355
356 nla_nest_end(skb, v2_attrs);
357 return 0;
358
359errv2attr:
360 nla_nest_cancel(skb, v2_attrs);
361errattr:
362 return -EMSGSIZE;
363}
364
e9b8c845
GG
365static int smc_nl_fill_lgr(struct smc_link_group *lgr,
366 struct sk_buff *skb,
367 struct netlink_callback *cb)
368{
369 char smc_target[SMC_MAX_PNETID_LEN + 1];
b0539f5e 370 struct nlattr *attrs, *v2_attrs;
e9b8c845
GG
371
372 attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
373 if (!attrs)
374 goto errout;
375
376 if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
377 goto errattr;
378 if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
379 goto errattr;
380 if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
381 goto errattr;
382 if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
383 goto errattr;
ddefb2d2
WG
384 if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type))
385 goto errattr;
e9b8c845
GG
386 if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
387 goto errattr;
79d39fc5
TL
388 if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
389 lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
390 goto errattr;
8a446536
GG
391 memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
392 smc_target[SMC_MAX_PNETID_LEN] = 0;
e9b8c845
GG
393 if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
394 goto errattr;
d386d59b
WG
395 if (nla_put_uint(skb, SMC_NLA_LGR_R_SNDBUF_ALLOC, lgr->alloc_sndbufs))
396 goto errattr;
397 if (nla_put_uint(skb, SMC_NLA_LGR_R_RMB_ALLOC, lgr->alloc_rmbs))
398 goto errattr;
b0539f5e
KG
399 if (lgr->smc_version > SMC_V1) {
400 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
401 if (!v2_attrs)
402 goto errattr;
403 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
404 goto errattr;
405 if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
406 goto errattr;
407 }
e9b8c845
GG
408
409 nla_nest_end(skb, attrs);
410 return 0;
411errattr:
412 nla_nest_cancel(skb, attrs);
413errout:
414 return -EMSGSIZE;
415}
416
5a7e09d5
GG
417static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
418 struct smc_link *link,
419 struct sk_buff *skb,
420 struct netlink_callback *cb)
421{
8a446536 422 char smc_ibname[IB_DEVICE_NAME_MAX];
5a7e09d5
GG
423 u8 smc_gid_target[41];
424 struct nlattr *attrs;
425 u32 link_uid = 0;
426 void *nlh;
427
428 nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
429 &smc_gen_nl_family, NLM_F_MULTI,
430 SMC_NETLINK_GET_LINK_SMCR);
431 if (!nlh)
432 goto errmsg;
433
434 attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
435 if (!attrs)
436 goto errout;
437
438 if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
439 goto errattr;
440 if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
441 goto errattr;
442 if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
443 atomic_read(&link->conn_cnt)))
444 goto errattr;
445 if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
446 goto errattr;
447 if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
448 goto errattr;
449 snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
450 if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
451 goto errattr;
452 memcpy(&link_uid, link->link_uid, sizeof(link_uid));
453 if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
454 goto errattr;
455 memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
456 if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
457 goto errattr;
458 memset(smc_gid_target, 0, sizeof(smc_gid_target));
459 smc_gid_be16_convert(smc_gid_target, link->gid);
460 if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
461 goto errattr;
462 memset(smc_gid_target, 0, sizeof(smc_gid_target));
463 smc_gid_be16_convert(smc_gid_target, link->peer_gid);
464 if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
465 goto errattr;
466
467 nla_nest_end(skb, attrs);
468 genlmsg_end(skb, nlh);
469 return 0;
470errattr:
471 nla_nest_cancel(skb, attrs);
472errout:
473 genlmsg_cancel(skb, nlh);
474errmsg:
475 return -EMSGSIZE;
476}
477
e9b8c845
GG
478static int smc_nl_handle_lgr(struct smc_link_group *lgr,
479 struct sk_buff *skb,
5a7e09d5
GG
480 struct netlink_callback *cb,
481 bool list_links)
e9b8c845
GG
482{
483 void *nlh;
5a7e09d5 484 int i;
e9b8c845
GG
485
486 nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
487 &smc_gen_nl_family, NLM_F_MULTI,
488 SMC_NETLINK_GET_LGR_SMCR);
489 if (!nlh)
490 goto errmsg;
491 if (smc_nl_fill_lgr(lgr, skb, cb))
492 goto errout;
493
494 genlmsg_end(skb, nlh);
5a7e09d5
GG
495 if (!list_links)
496 goto out;
497 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
498 if (!smc_link_usable(&lgr->lnk[i]))
499 continue;
500 if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
501 goto errout;
502 }
503out:
e9b8c845
GG
504 return 0;
505
506errout:
507 genlmsg_cancel(skb, nlh);
508errmsg:
509 return -EMSGSIZE;
510}
511
512static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
513 struct sk_buff *skb,
5a7e09d5
GG
514 struct netlink_callback *cb,
515 bool list_links)
e9b8c845
GG
516{
517 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
518 struct smc_link_group *lgr;
519 int snum = cb_ctx->pos[0];
520 int num = 0;
521
522 spin_lock_bh(&smc_lgr->lock);
523 list_for_each_entry(lgr, &smc_lgr->list, list) {
524 if (num < snum)
525 goto next;
5a7e09d5 526 if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
e9b8c845
GG
527 goto errout;
528next:
529 num++;
530 }
531errout:
532 spin_unlock_bh(&smc_lgr->lock);
533 cb_ctx->pos[0] = num;
534}
535
8f9dde4b
GG
536static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
537 struct sk_buff *skb,
538 struct netlink_callback *cb)
539{
8f9dde4b 540 char smc_pnet[SMC_MAX_PNETID_LEN + 1];
9de4df7b 541 struct smcd_dev *smcd = lgr->smcd;
b40584d1 542 struct smcd_gid smcd_gid;
8f9dde4b
GG
543 struct nlattr *attrs;
544 void *nlh;
545
546 nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
547 &smc_gen_nl_family, NLM_F_MULTI,
548 SMC_NETLINK_GET_LGR_SMCD);
549 if (!nlh)
550 goto errmsg;
551
552 attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
553 if (!attrs)
554 goto errout;
555
556 if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
557 goto errattr;
b40584d1 558 smcd->ops->get_local_gid(smcd, &smcd_gid);
9de4df7b 559 if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID,
b40584d1 560 smcd_gid.gid, SMC_NLA_LGR_D_PAD))
8f9dde4b 561 goto errattr;
01fd1617
WG
562 if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_EXT_GID,
563 smcd_gid.gid_ext, SMC_NLA_LGR_D_PAD))
564 goto errattr;
b40584d1 565 if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid.gid,
8f9dde4b
GG
566 SMC_NLA_LGR_D_PAD))
567 goto errattr;
01fd1617
WG
568 if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_EXT_GID,
569 lgr->peer_gid.gid_ext, SMC_NLA_LGR_D_PAD))
570 goto errattr;
8f9dde4b
GG
571 if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
572 goto errattr;
573 if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
574 goto errattr;
575 if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
576 goto errattr;
d386d59b
WG
577 if (nla_put_uint(skb, SMC_NLA_LGR_D_SNDBUF_ALLOC, lgr->alloc_sndbufs))
578 goto errattr;
579 if (nla_put_uint(skb, SMC_NLA_LGR_D_DMB_ALLOC, lgr->alloc_rmbs))
580 goto errattr;
8a446536
GG
581 memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
582 smc_pnet[SMC_MAX_PNETID_LEN] = 0;
8f9dde4b
GG
583 if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
584 goto errattr;
b0539f5e
KG
585 if (lgr->smc_version > SMC_V1) {
586 struct nlattr *v2_attrs;
8f9dde4b 587
b0539f5e
KG
588 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
589 if (!v2_attrs)
590 goto errattr;
591 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
592 goto errattr;
593 }
8f9dde4b
GG
594 nla_nest_end(skb, attrs);
595 genlmsg_end(skb, nlh);
596 return 0;
597
8f9dde4b
GG
598errattr:
599 nla_nest_cancel(skb, attrs);
600errout:
601 genlmsg_cancel(skb, nlh);
602errmsg:
603 return -EMSGSIZE;
604}
605
606static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
607 struct sk_buff *skb,
608 struct netlink_callback *cb)
609{
610 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
611 struct smc_link_group *lgr;
612 int snum = cb_ctx->pos[1];
613 int rc = 0, num = 0;
614
615 spin_lock_bh(&dev->lgr_lock);
616 list_for_each_entry(lgr, &dev->lgr_list, list) {
617 if (!lgr->is_smcd)
618 continue;
619 if (num < snum)
620 goto next;
621 rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
622 if (rc)
623 goto errout;
624next:
625 num++;
626 }
627errout:
628 spin_unlock_bh(&dev->lgr_lock);
629 cb_ctx->pos[1] = num;
630 return rc;
631}
632
633static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
634 struct sk_buff *skb,
635 struct netlink_callback *cb)
636{
637 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
638 struct smcd_dev *smcd_dev;
639 int snum = cb_ctx->pos[0];
640 int rc = 0, num = 0;
641
642 mutex_lock(&dev_list->mutex);
643 list_for_each_entry(smcd_dev, &dev_list->list, list) {
644 if (list_empty(&smcd_dev->lgr_list))
645 continue;
646 if (num < snum)
647 goto next;
648 rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
649 if (rc)
650 goto errout;
651next:
652 num++;
653 }
654errout:
655 mutex_unlock(&dev_list->mutex);
656 cb_ctx->pos[0] = num;
657 return rc;
658}
659
e9b8c845
GG
660int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
661{
5a7e09d5
GG
662 bool list_links = false;
663
664 smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
665 return skb->len;
666}
667
668int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
669{
670 bool list_links = true;
671
672 smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
e9b8c845
GG
673 return skb->len;
674}
675
8f9dde4b
GG
676int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
677{
678 smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
679 return skb->len;
680}
681
36595d8a 682void smc_lgr_cleanup_early(struct smc_link_group *lgr)
51e3dfa8 683{
9ec6bf19 684 spinlock_t *lgr_lock;
51e3dfa8
UB
685
686 if (!lgr)
687 return;
688
789b6cc2 689 smc_lgr_list_head(lgr, &lgr_lock);
9ec6bf19
KG
690 spin_lock_bh(lgr_lock);
691 /* do not use this link group for new connections */
789b6cc2
DL
692 if (!list_empty(&lgr->list))
693 list_del_init(&lgr->list);
9ec6bf19 694 spin_unlock_bh(lgr_lock);
f9aab6f2 695 __smc_lgr_terminate(lgr, true);
51e3dfa8
UB
696}
697
a52bcc91
KG
698static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
699{
700 int i;
701
702 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
703 struct smc_link *lnk = &lgr->lnk[i];
704
90cee52f 705 if (smc_link_sendable(lnk))
a52bcc91
KG
706 lnk->state = SMC_LNK_INACTIVE;
707 }
6778a6be
KG
708 wake_up_all(&lgr->llc_msg_waiter);
709 wake_up_all(&lgr->llc_flow_waiter);
a52bcc91
KG
710}
711
3f3f0e36
UB
712static void smc_lgr_free(struct smc_link_group *lgr);
713
0cfdd8f9
UB
714static void smc_lgr_free_work(struct work_struct *work)
715{
716 struct smc_link_group *lgr = container_of(to_delayed_work(work),
717 struct smc_link_group,
718 free_work);
a0a62ee1 719 spinlock_t *lgr_lock;
0cfdd8f9
UB
720 bool conns;
721
a0a62ee1
UB
722 smc_lgr_list_head(lgr, &lgr_lock);
723 spin_lock_bh(lgr_lock);
8e316b9e
UB
724 if (lgr->freeing) {
725 spin_unlock_bh(lgr_lock);
726 return;
727 }
0cfdd8f9
UB
728 read_lock_bh(&lgr->conns_lock);
729 conns = RB_EMPTY_ROOT(&lgr->conns_all);
730 read_unlock_bh(&lgr->conns_lock);
731 if (!conns) { /* number of lgr connections is no longer zero */
a0a62ee1 732 spin_unlock_bh(lgr_lock);
0cfdd8f9
UB
733 return;
734 }
8caa6544 735 list_del_init(&lgr->list); /* remove from smc_lgr_list */
8e316b9e
UB
736 lgr->freeing = 1; /* this instance does the freeing, no new schedule */
737 spin_unlock_bh(lgr_lock);
738 cancel_delayed_work(&lgr->free_work);
0d18a0cb 739
f3811fd7
KG
740 if (!lgr->is_smcd && !lgr->terminating)
741 smc_llc_send_link_delete_all(lgr, true,
742 SMC_LLC_DEL_PROG_INIT_TERM);
42bfba9e 743 if (lgr->is_smcd && !lgr->terminating)
8e316b9e 744 smc_ism_signal_shutdown(lgr);
a52bcc91
KG
745 if (!lgr->is_smcd)
746 smcr_lgr_link_deactivate_all(lgr);
8e316b9e 747 smc_lgr_free(lgr);
0cfdd8f9
UB
748}
749
f528ba24
UB
750static void smc_lgr_terminate_work(struct work_struct *work)
751{
752 struct smc_link_group *lgr = container_of(work, struct smc_link_group,
753 terminate_work);
754
5f78fe96 755 __smc_lgr_terminate(lgr, true);
f528ba24
UB
756}
757
026c381f
KG
758/* return next unique link id for the lgr */
759static u8 smcr_next_link_id(struct smc_link_group *lgr)
760{
761 u8 link_id;
762 int i;
763
764 while (1) {
cf4f5530 765again:
026c381f
KG
766 link_id = ++lgr->next_link_id;
767 if (!link_id) /* skip zero as link_id */
768 link_id = ++lgr->next_link_id;
769 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
d854fcbf 770 if (smc_link_usable(&lgr->lnk[i]) &&
026c381f 771 lgr->lnk[i].link_id == link_id)
cf4f5530 772 goto again;
026c381f
KG
773 }
774 break;
775 }
776 return link_id;
777}
778
6443b2f6
GG
779static void smcr_copy_dev_info_to_link(struct smc_link *link)
780{
781 struct smc_ib_device *smcibdev = link->smcibdev;
782
783 snprintf(link->ibname, sizeof(link->ibname), "%s",
784 smcibdev->ibdev->name);
785 link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
786}
787
336ba09f
KG
788int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
789 u8 link_idx, struct smc_init_info *ini)
f3c1dedd 790{
e49300a6 791 struct smc_ib_device *smcibdev;
f3c1dedd
KG
792 u8 rndvec[3];
793 int rc;
794
e49300a6
KG
795 if (lgr->smc_version == SMC_V2) {
796 lnk->smcibdev = ini->smcrv2.ib_dev_v2;
797 lnk->ibport = ini->smcrv2.ib_port_v2;
27ef6a99
GW
798 lnk->wr_rx_sge_cnt = lnk->smcibdev->ibdev->attrs.max_recv_sge < 2 ? 1 : 2;
799 lnk->wr_rx_buflen = smc_link_shared_v2_rxbuf(lnk) ?
800 SMC_WR_BUF_SIZE : SMC_WR_BUF_V2_SIZE;
e49300a6
KG
801 } else {
802 lnk->smcibdev = ini->ib_dev;
803 lnk->ibport = ini->ib_port;
27ef6a99
GW
804 lnk->wr_rx_sge_cnt = 1;
805 lnk->wr_rx_buflen = SMC_WR_BUF_SIZE;
e49300a6
KG
806 }
807 get_device(&lnk->smcibdev->ibdev->dev);
808 atomic_inc(&lnk->smcibdev->lnk_cnt);
20c9398d
WG
809 refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
810 lnk->clearing = 0;
e49300a6 811 lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
026c381f 812 lnk->link_id = smcr_next_link_id(lgr);
387707fd 813 lnk->lgr = lgr;
61f434b0 814 smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
026c381f 815 lnk->link_idx = link_idx;
e9b1a4f8 816 lnk->wr_rx_id_compl = 0;
ddc99286 817 smc_ibdev_cnt_inc(lnk);
6443b2f6 818 smcr_copy_dev_info_to_link(lnk);
07d51580 819 atomic_set(&lnk->conn_cnt, 0);
45fa8da0 820 smc_llc_link_set_uid(lnk);
541afa10 821 INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
e49300a6
KG
822 if (!lnk->smcibdev->initialized) {
823 rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
f3c1dedd
KG
824 if (rc)
825 goto out;
826 }
827 get_random_bytes(rndvec, sizeof(rndvec));
828 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
829 (rndvec[2] << 16);
830 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
24fb6811
KG
831 ini->vlan_id, lnk->gid, &lnk->sgid_index,
832 lgr->smc_version == SMC_V2 ?
833 &ini->smcrv2 : NULL);
f3c1dedd
KG
834 if (rc)
835 goto out;
836 rc = smc_llc_link_init(lnk);
837 if (rc)
838 goto out;
839 rc = smc_wr_alloc_link_mem(lnk);
840 if (rc)
841 goto clear_llc_lnk;
842 rc = smc_ib_create_protection_domain(lnk);
843 if (rc)
844 goto free_link_mem;
845 rc = smc_ib_create_queue_pair(lnk);
846 if (rc)
847 goto dealloc_pd;
848 rc = smc_wr_create_link(lnk);
849 if (rc)
850 goto destroy_qp;
741a49a4 851 lnk->state = SMC_LNK_ACTIVATING;
f3c1dedd
KG
852 return 0;
853
854destroy_qp:
855 smc_ib_destroy_queue_pair(lnk);
856dealloc_pd:
857 smc_ib_dealloc_protection_domain(lnk);
858free_link_mem:
859 smc_wr_free_link_mem(lnk);
860clear_llc_lnk:
0a99be43 861 smc_llc_link_clear(lnk, false);
f3c1dedd 862out:
ddc99286 863 smc_ibdev_cnt_dec(lnk);
e49300a6
KG
864 put_device(&lnk->smcibdev->ibdev->dev);
865 smcibdev = lnk->smcibdev;
f3c1dedd 866 memset(lnk, 0, sizeof(struct smc_link));
d854fcbf 867 lnk->state = SMC_LNK_UNUSED;
e49300a6
KG
868 if (!atomic_dec_return(&smcibdev->lnk_cnt))
869 wake_up(&smcibdev->lnks_deleted);
61f434b0 870 smc_lgr_put(lgr); /* lgr_hold above */
f3c1dedd
KG
871 return rc;
872}
873
0cfdd8f9 874/* create a new SMC link group */
bc36d2fc 875static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
0cfdd8f9
UB
876{
877 struct smc_link_group *lgr;
a2351c5d 878 struct list_head *lgr_list;
8c81ba20 879 struct smcd_dev *smcd;
0cfdd8f9 880 struct smc_link *lnk;
a0a62ee1 881 spinlock_t *lgr_lock;
026c381f 882 u8 link_idx;
0cfdd8f9 883 int rc = 0;
cd6851f3 884 int i;
0cfdd8f9 885
bc36d2fc 886 if (ini->is_smcd && ini->vlan_id) {
5c21c4cc
UB
887 if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
888 ini->vlan_id)) {
7a62725a 889 rc = SMC_CLC_DECL_ISMVLANERR;
c6ba7c9b 890 goto out;
7a62725a 891 }
c6ba7c9b
HW
892 }
893
0cfdd8f9
UB
894 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
895 if (!lgr) {
7a62725a 896 rc = SMC_CLC_DECL_MEM;
29ee2701 897 goto ism_put_vlan;
0cfdd8f9 898 }
22ef473d
KG
899 lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
900 SMC_LGR_ID_SIZE, &lgr->id);
901 if (!lgr->tx_wq) {
902 rc = -ENOMEM;
903 goto free_lgr;
904 }
bc36d2fc 905 lgr->is_smcd = ini->is_smcd;
517c300e 906 lgr->sync_err = 0;
8e316b9e 907 lgr->terminating = 0;
8e316b9e 908 lgr->freeing = 0;
bc36d2fc 909 lgr->vlan_id = ini->vlan_id;
61f434b0 910 refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
aff7bfed
W
911 init_rwsem(&lgr->sndbufs_lock);
912 init_rwsem(&lgr->rmbs_lock);
c6ba7c9b 913 rwlock_init(&lgr->conns_lock);
cd6851f3
UB
914 for (i = 0; i < SMC_RMBE_SIZES; i++) {
915 INIT_LIST_HEAD(&lgr->sndbufs[i]);
916 INIT_LIST_HEAD(&lgr->rmbs[i]);
917 }
026c381f 918 lgr->next_link_id = 0;
9fda3510
HW
919 smc_lgr_list.num += SMC_LGR_NUM_INCR;
920 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
0cfdd8f9 921 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
f528ba24 922 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
0cfdd8f9 923 lgr->conns_all = RB_ROOT;
bc36d2fc 924 if (ini->is_smcd) {
c6ba7c9b 925 /* SMC-D specific settings */
8c81ba20
SR
926 smcd = ini->ism_dev[ini->ism_selected];
927 get_device(smcd->ops->get_dev(smcd));
b40584d1
WG
928 lgr->peer_gid.gid =
929 ini->ism_peer_gid[ini->ism_selected].gid;
930 lgr->peer_gid.gid_ext =
931 ini->ism_peer_gid[ini->ism_selected].gid_ext;
5c21c4cc
UB
932 lgr->smcd = ini->ism_dev[ini->ism_selected];
933 lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
a0a62ee1 934 lgr_lock = &lgr->smcd->lgr_lock;
b81a5eb7 935 lgr->smc_version = ini->smcd_version;
50c6b20e 936 lgr->peer_shutdown = 0;
5c21c4cc 937 atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
c6ba7c9b
HW
938 } else {
939 /* SMC-R specific settings */
e49300a6
KG
940 struct smc_ib_device *ibdev;
941 int ibport;
942
c6ba7c9b 943 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
e49300a6
KG
944 lgr->smc_version = ini->smcr_version;
945 memcpy(lgr->peer_systemid, ini->peer_systemid,
bc36d2fc 946 SMC_SYSTEMID_LEN);
e49300a6
KG
947 if (lgr->smc_version == SMC_V2) {
948 ibdev = ini->smcrv2.ib_dev_v2;
949 ibport = ini->smcrv2.ib_port_v2;
950 lgr->saddr = ini->smcrv2.saddr;
951 lgr->uses_gateway = ini->smcrv2.uses_gateway;
952 memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
953 ETH_ALEN);
7f0620b9 954 lgr->max_conns = ini->max_conns;
69b888e3 955 lgr->max_links = ini->max_links;
e49300a6
KG
956 } else {
957 ibdev = ini->ib_dev;
958 ibport = ini->ib_port;
7f0620b9 959 lgr->max_conns = SMC_CONN_PER_LGR_MAX;
69b888e3 960 lgr->max_links = SMC_LINKS_ADD_LNK_MAX;
e49300a6
KG
961 }
962 memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
35dcf7ec 963 SMC_MAX_PNETID_LEN);
bdee15e8
DC
964 rc = smc_wr_alloc_lgr_mem(lgr);
965 if (rc)
8799e310 966 goto free_wq;
00a049cf
KG
967 smc_llc_lgr_init(lgr, smc);
968
026c381f
KG
969 link_idx = SMC_SINGLE_LINK;
970 lnk = &lgr->lnk[link_idx];
971 rc = smcr_link_init(lgr, lnk, link_idx, ini);
8799e310
KG
972 if (rc) {
973 smc_wr_free_lgr_mem(lgr);
22ef473d 974 goto free_wq;
8799e310 975 }
0237a3a6 976 lgr->net = smc_ib_net(lnk->smcibdev);
f3c1dedd
KG
977 lgr_list = &smc_lgr_list.list;
978 lgr_lock = &smc_lgr_list.lock;
b984f370 979 lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type;
6dabd405 980 atomic_inc(&lgr_cnt);
c6ba7c9b 981 }
0cfdd8f9 982 smc->conn.lgr = lgr;
a0a62ee1 983 spin_lock_bh(lgr_lock);
a9e44502 984 list_add_tail(&lgr->list, lgr_list);
a0a62ee1 985 spin_unlock_bh(lgr_lock);
f38ba179
UB
986 return 0;
987
22ef473d
KG
988free_wq:
989 destroy_workqueue(lgr->tx_wq);
f38ba179
UB
990free_lgr:
991 kfree(lgr);
29ee2701
UB
992ism_put_vlan:
993 if (ini->is_smcd && ini->vlan_id)
5c21c4cc 994 smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
0cfdd8f9 995out:
7a62725a
KG
996 if (rc < 0) {
997 if (rc == -ENOMEM)
998 rc = SMC_CLC_DECL_MEM;
999 else
1000 rc = SMC_CLC_DECL_INTERR;
1001 }
0cfdd8f9
UB
1002 return rc;
1003}
1004
c6f02ebe
KG
1005static int smc_write_space(struct smc_connection *conn)
1006{
1007 int buffer_len = conn->peer_rmbe_size;
1008 union smc_host_cursor prod;
1009 union smc_host_cursor cons;
1010 int space;
1011
1012 smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
1013 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
1014 /* determine rx_buf space */
1015 space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
1016 return space;
1017}
1018
b8ded9de
KG
1019static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
1020 struct smc_wr_buf *wr_buf)
c6f02ebe
KG
1021{
1022 struct smc_connection *conn = &smc->conn;
1023 union smc_host_cursor cons, fin;
1024 int rc = 0;
1025 int diff;
1026
1027 smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
1028 smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
1029 /* set prod cursor to old state, enforce tx_rdma_writes() */
1030 smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
1031 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
1032
1033 if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
1034 /* cons cursor advanced more than fin, and prod was set
1035 * fin above, so now prod is smaller than cons. Fix that.
1036 */
1037 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
1038 smc_curs_add(conn->sndbuf_desc->len,
1039 &conn->tx_curs_sent, diff);
1040 smc_curs_add(conn->sndbuf_desc->len,
1041 &conn->tx_curs_fin, diff);
1042
1043 smp_mb__before_atomic();
1044 atomic_add(diff, &conn->sndbuf_space);
1045 smp_mb__after_atomic();
1046
1047 smc_curs_add(conn->peer_rmbe_size,
1048 &conn->local_tx_ctrl.prod, diff);
1049 smc_curs_add(conn->peer_rmbe_size,
1050 &conn->local_tx_ctrl_fin, diff);
1051 }
1052 /* recalculate, value is used by tx_rdma_writes() */
1053 atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
1054
1055 if (smc->sk.sk_state != SMC_INIT &&
1056 smc->sk.sk_state != SMC_CLOSED) {
b8ded9de 1057 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
c6f02ebe 1058 if (!rc) {
22ef473d 1059 queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
c6f02ebe
KG
1060 smc->sk.sk_data_ready(&smc->sk);
1061 }
b8ded9de
KG
1062 } else {
1063 smc_wr_tx_put_slot(conn->lnk,
1064 (struct smc_wr_tx_pend_priv *)pend);
c6f02ebe
KG
1065 }
1066 return rc;
1067}
1068
64513d26
GG
1069void smc_switch_link_and_count(struct smc_connection *conn,
1070 struct smc_link *to_lnk)
07d51580
GG
1071{
1072 atomic_dec(&conn->lnk->conn_cnt);
20c9398d
WG
1073 /* link_hold in smc_conn_create() */
1074 smcr_link_put(conn->lnk);
07d51580
GG
1075 conn->lnk = to_lnk;
1076 atomic_inc(&conn->lnk->conn_cnt);
20c9398d
WG
1077 /* link_put in smc_conn_free() */
1078 smcr_link_hold(conn->lnk);
07d51580
GG
1079}
1080
c6f02ebe
KG
1081struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
1082 struct smc_link *from_lnk, bool is_dev_err)
1083{
1084 struct smc_link *to_lnk = NULL;
b8ded9de 1085 struct smc_cdc_tx_pend *pend;
c6f02ebe 1086 struct smc_connection *conn;
b8ded9de 1087 struct smc_wr_buf *wr_buf;
c6f02ebe
KG
1088 struct smc_sock *smc;
1089 struct rb_node *node;
1090 int i, rc = 0;
1091
1092 /* link is inactive, wake up tx waiters */
1093 smc_wr_wakeup_tx_wait(from_lnk);
1094
1095 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
741a49a4 1096 if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
c6f02ebe
KG
1097 continue;
1098 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
1099 from_lnk->ibport == lgr->lnk[i].ibport) {
1100 continue;
1101 }
1102 to_lnk = &lgr->lnk[i];
1103 break;
1104 }
95f7f3e7 1105 if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
c6f02ebe
KG
1106 smc_lgr_terminate_sched(lgr);
1107 return NULL;
1108 }
1109again:
1110 read_lock_bh(&lgr->conns_lock);
1111 for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
1112 conn = rb_entry(node, struct smc_connection, alert_node);
1113 if (conn->lnk != from_lnk)
1114 continue;
1115 smc = container_of(conn, struct smc_sock, conn);
1116 /* conn->lnk not yet set in SMC_INIT state */
1117 if (smc->sk.sk_state == SMC_INIT)
1118 continue;
1119 if (smc->sk.sk_state == SMC_CLOSED ||
1120 smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
1121 smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
1122 smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
1123 smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
1124 smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
1125 smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
1126 smc->sk.sk_state == SMC_PEERABORTWAIT ||
1127 smc->sk.sk_state == SMC_PROCESSABORT) {
1128 spin_lock_bh(&conn->send_lock);
07d51580 1129 smc_switch_link_and_count(conn, to_lnk);
c6f02ebe
KG
1130 spin_unlock_bh(&conn->send_lock);
1131 continue;
1132 }
1133 sock_hold(&smc->sk);
1134 read_unlock_bh(&lgr->conns_lock);
b8ded9de
KG
1135 /* pre-fetch buffer outside of send_lock, might sleep */
1136 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
95f7f3e7
KG
1137 if (rc)
1138 goto err_out;
c6f02ebe
KG
1139 /* avoid race with smcr_tx_sndbuf_nonempty() */
1140 spin_lock_bh(&conn->send_lock);
07d51580 1141 smc_switch_link_and_count(conn, to_lnk);
b8ded9de 1142 rc = smc_switch_cursor(smc, pend, wr_buf);
c6f02ebe
KG
1143 spin_unlock_bh(&conn->send_lock);
1144 sock_put(&smc->sk);
95f7f3e7
KG
1145 if (rc)
1146 goto err_out;
c6f02ebe
KG
1147 goto again;
1148 }
1149 read_unlock_bh(&lgr->conns_lock);
95f7f3e7 1150 smc_wr_tx_link_put(to_lnk);
c6f02ebe 1151 return to_lnk;
95f7f3e7
KG
1152
1153err_out:
1154 smcr_link_down_cond_sched(to_lnk);
1155 smc_wr_tx_link_put(to_lnk);
1156 return NULL;
c6f02ebe
KG
1157}
1158
b8d19945 1159static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
6d74c3a8 1160 struct smc_link_group *lgr)
b9247544 1161{
aff7bfed 1162 struct rw_semaphore *lock; /* lock buffer list */
d5500667
KG
1163 int rc;
1164
b8d19945 1165 if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
b9247544 1166 /* unregister rmb with peer */
d5500667
KG
1167 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
1168 if (!rc) {
1169 /* protect against smc_llc_cli_rkey_exchange() */
f6421014 1170 down_read(&lgr->llc_conf_mutex);
b8d19945
WG
1171 smc_llc_do_delete_rkey(lgr, buf_desc);
1172 buf_desc->is_conf_rkey = false;
f6421014 1173 up_read(&lgr->llc_conf_mutex);
d5500667
KG
1174 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
1175 }
b9247544 1176 }
d5500667 1177
b8d19945 1178 if (buf_desc->is_reg_err) {
b9247544 1179 /* buf registration failed, reuse not possible */
b8d19945
WG
1180 lock = is_rmb ? &lgr->rmbs_lock :
1181 &lgr->sndbufs_lock;
aff7bfed 1182 down_write(lock);
d386d59b 1183 smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
aff7bfed 1184 up_write(lock);
b9247544 1185
b8d19945 1186 smc_buf_free(lgr, is_rmb, buf_desc);
b9247544 1187 } else {
475f9ff6
W
1188 /* memzero_explicit provides potential memory barrier semantics */
1189 memzero_explicit(buf_desc->cpu_addr, buf_desc->len);
1190 WRITE_ONCE(buf_desc->used, 0);
b9247544
KG
1191 }
1192}
1193
ae2be35c
WG
1194static void smcd_buf_detach(struct smc_connection *conn)
1195{
1196 struct smcd_dev *smcd = conn->lgr->smcd;
1197 u64 peer_token = conn->peer_token;
1198
1199 if (!conn->sndbuf_desc)
1200 return;
1201
1202 smc_ism_detach_dmb(smcd, peer_token);
1203
1204 kfree(conn->sndbuf_desc);
1205 conn->sndbuf_desc = NULL;
1206}
1207
fb692ec4
KG
1208static void smc_buf_unuse(struct smc_connection *conn,
1209 struct smc_link_group *lgr)
cd6851f3 1210{
e0d10354
WG
1211 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1212 bool is_smcd = lgr->is_smcd;
1213 int bufsize;
1214
1c552696 1215 if (conn->sndbuf_desc) {
e0d10354
WG
1216 bufsize = conn->sndbuf_desc->len;
1217 if (!is_smcd && conn->sndbuf_desc->is_vm) {
b8d19945
WG
1218 smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
1219 } else {
e0d10354 1220 memzero_explicit(conn->sndbuf_desc->cpu_addr, bufsize);
475f9ff6 1221 WRITE_ONCE(conn->sndbuf_desc->used, 0);
b8d19945 1222 }
e0d10354 1223 SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize);
1c552696 1224 }
b8d19945 1225 if (conn->rmb_desc) {
e0d10354
WG
1226 bufsize = conn->rmb_desc->len;
1227 if (!is_smcd) {
b8d19945
WG
1228 smcr_buf_unuse(conn->rmb_desc, true, lgr);
1229 } else {
e0d10354
WG
1230 bufsize += sizeof(struct smcd_cdc_msg);
1231 memzero_explicit(conn->rmb_desc->cpu_addr, bufsize);
475f9ff6 1232 WRITE_ONCE(conn->rmb_desc->used, 0);
b8d19945 1233 }
e0d10354 1234 SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize);
1c552696 1235 }
cd6851f3
UB
1236}
1237
0cfdd8f9
UB
1238/* remove a finished connection from its link group */
1239void smc_conn_free(struct smc_connection *conn)
1240{
fb692ec4
KG
1241 struct smc_link_group *lgr = conn->lgr;
1242
61f434b0
WG
1243 if (!lgr || conn->freed)
1244 /* Connection has never been registered in a
1245 * link group, or has already been freed.
1246 */
0cfdd8f9 1247 return;
61f434b0
WG
1248
1249 conn->freed = 1;
ea89c6c0 1250 if (!smc_conn_lgr_valid(conn))
61f434b0
WG
1251 /* Connection has already unregistered from
1252 * link group.
1253 */
1254 goto lgr_put;
1255
fb692ec4 1256 if (lgr->is_smcd) {
42bfba9e
UB
1257 if (!list_empty(&lgr->list))
1258 smc_ism_unset_conn(conn);
ae2be35c
WG
1259 if (smc_ism_support_dmb_nocopy(lgr->smcd))
1260 smcd_buf_detach(conn);
be244f28
HW
1261 tasklet_kill(&conn->rx_tsklet);
1262 } else {
349d4312 1263 smc_cdc_wait_pend_tx_wr(conn);
b286a065
KG
1264 if (current_work() != &conn->abort_work)
1265 cancel_work_sync(&conn->abort_work);
be244f28 1266 }
2a0674ff 1267 if (!list_empty(&lgr->list)) {
2a0674ff 1268 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
0537f0a2 1269 smc_lgr_unregister_conn(conn);
2a0674ff 1270 }
fb692ec4
KG
1271
1272 if (!lgr->conns_num)
1273 smc_lgr_schedule_free_work(lgr);
61f434b0 1274lgr_put:
20c9398d
WG
1275 if (!lgr->is_smcd)
1276 smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
61f434b0 1277 smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
0cfdd8f9
UB
1278}
1279
4a3641c1
KG
1280/* unregister a link from a buf_desc */
1281static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1282 struct smc_link *lnk)
1283{
b8d19945 1284 if (is_rmb || buf_desc->is_vm)
4a3641c1
KG
1285 buf_desc->is_reg_mr[lnk->link_idx] = false;
1286 if (!buf_desc->is_map_ib[lnk->link_idx])
1287 return;
b8d19945
WG
1288
1289 if ((is_rmb || buf_desc->is_vm) &&
1290 buf_desc->mr[lnk->link_idx]) {
1291 smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
1292 buf_desc->mr[lnk->link_idx] = NULL;
1293 }
1294 if (is_rmb)
4a3641c1 1295 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
b8d19945 1296 else
4a3641c1 1297 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
b8d19945 1298
4a3641c1
KG
1299 sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1300 buf_desc->is_map_ib[lnk->link_idx] = false;
1301}
1302
1303/* unmap all buffers of lgr for a deleted link */
1304static void smcr_buf_unmap_lgr(struct smc_link *lnk)
1305{
1306 struct smc_link_group *lgr = lnk->lgr;
1307 struct smc_buf_desc *buf_desc, *bf;
1308 int i;
1309
1310 for (i = 0; i < SMC_RMBE_SIZES; i++) {
aff7bfed 1311 down_write(&lgr->rmbs_lock);
4a3641c1
KG
1312 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
1313 smcr_buf_unmap_link(buf_desc, true, lnk);
aff7bfed
W
1314 up_write(&lgr->rmbs_lock);
1315
1316 down_write(&lgr->sndbufs_lock);
4a3641c1
KG
1317 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
1318 list)
1319 smcr_buf_unmap_link(buf_desc, false, lnk);
aff7bfed 1320 up_write(&lgr->sndbufs_lock);
4a3641c1
KG
1321 }
1322}
1323
1324static void smcr_rtoken_clear_link(struct smc_link *lnk)
1325{
1326 struct smc_link_group *lgr = lnk->lgr;
1327 int i;
1328
1329 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1330 lgr->rtokens[i][lnk->link_idx].rkey = 0;
1331 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
1332 }
1333}
1334
20c9398d 1335static void __smcr_link_clear(struct smc_link *lnk)
0cfdd8f9 1336{
61f434b0 1337 struct smc_link_group *lgr = lnk->lgr;
d854fcbf
KG
1338 struct smc_ib_device *smcibdev;
1339
f38ba179 1340 smc_wr_free_link_mem(lnk);
ddc99286 1341 smc_ibdev_cnt_dec(lnk);
f3c1dedd 1342 put_device(&lnk->smcibdev->ibdev->dev);
d854fcbf
KG
1343 smcibdev = lnk->smcibdev;
1344 memset(lnk, 0, sizeof(struct smc_link));
1345 lnk->state = SMC_LNK_UNUSED;
1346 if (!atomic_dec_return(&smcibdev->lnk_cnt))
1347 wake_up(&smcibdev->lnks_deleted);
61f434b0 1348 smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
0cfdd8f9
UB
1349}
1350
20c9398d
WG
1351/* must be called under lgr->llc_conf_mutex lock */
1352void smcr_link_clear(struct smc_link *lnk, bool log)
1353{
1354 if (!lnk->lgr || lnk->clearing ||
1355 lnk->state == SMC_LNK_UNUSED)
1356 return;
1357 lnk->clearing = 1;
1358 lnk->peer_qpn = 0;
1359 smc_llc_link_clear(lnk, log);
1360 smcr_buf_unmap_lgr(lnk);
1361 smcr_rtoken_clear_link(lnk);
1362 smc_ib_modify_qp_error(lnk);
1363 smc_wr_free_link(lnk);
1364 smc_ib_destroy_queue_pair(lnk);
1365 smc_ib_dealloc_protection_domain(lnk);
1366 smcr_link_put(lnk); /* theoretically last link_put */
1367}
1368
1369void smcr_link_hold(struct smc_link *lnk)
1370{
1371 refcount_inc(&lnk->refcnt);
1372}
1373
1374void smcr_link_put(struct smc_link *lnk)
1375{
1376 if (refcount_dec_and_test(&lnk->refcnt))
1377 __smcr_link_clear(lnk);
1378}
1379
c6ba7c9b
HW
1380static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
1381 struct smc_buf_desc *buf_desc)
cd6851f3 1382{
b9247544 1383 int i;
6511aad3 1384
4a3641c1
KG
1385 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1386 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
387707fd 1387
b8d19945 1388 if (!buf_desc->is_vm && buf_desc->pages)
2ef4f27a 1389 __free_pages(buf_desc->pages, buf_desc->order);
b8d19945
WG
1390 else if (buf_desc->is_vm && buf_desc->cpu_addr)
1391 vfree(buf_desc->cpu_addr);
3e034725 1392 kfree(buf_desc);
cd6851f3
UB
1393}
1394
c6ba7c9b
HW
1395static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
1396 struct smc_buf_desc *buf_desc)
1397{
be244f28
HW
1398 if (is_dmb) {
1399 /* restore original buf len */
1400 buf_desc->len += sizeof(struct smcd_cdc_msg);
c6ba7c9b 1401 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
be244f28 1402 } else {
c6ba7c9b 1403 kfree(buf_desc->cpu_addr);
be244f28 1404 }
c6ba7c9b
HW
1405 kfree(buf_desc);
1406}
1407
1408static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
1409 struct smc_buf_desc *buf_desc)
1410{
1411 if (lgr->is_smcd)
1412 smcd_buf_free(lgr, is_rmb, buf_desc);
1413 else
1414 smcr_buf_free(lgr, is_rmb, buf_desc);
1415}
1416
3e034725 1417static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
cd6851f3 1418{
3e034725
UB
1419 struct smc_buf_desc *buf_desc, *bf_desc;
1420 struct list_head *buf_list;
cd6851f3
UB
1421 int i;
1422
1423 for (i = 0; i < SMC_RMBE_SIZES; i++) {
3e034725
UB
1424 if (is_rmb)
1425 buf_list = &lgr->rmbs[i];
1426 else
1427 buf_list = &lgr->sndbufs[i];
1428 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
cd6851f3 1429 list) {
d386d59b 1430 smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
6511aad3 1431 smc_buf_free(lgr, is_rmb, buf_desc);
cd6851f3
UB
1432 }
1433 }
1434}
1435
3e034725
UB
1436static void smc_lgr_free_bufs(struct smc_link_group *lgr)
1437{
1438 /* free send buffers */
1439 __smc_lgr_free_bufs(lgr, false);
1440 /* free rmbs */
1441 __smc_lgr_free_bufs(lgr, true);
1442}
1443
61f434b0
WG
1444/* won't be freed until no one accesses to lgr anymore */
1445static void __smc_lgr_free(struct smc_link_group *lgr)
1446{
1447 smc_lgr_free_bufs(lgr);
1448 if (lgr->is_smcd) {
1449 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
1450 wake_up(&lgr->smcd->lgrs_deleted);
1451 } else {
1452 smc_wr_free_lgr_mem(lgr);
1453 if (!atomic_dec_return(&lgr_cnt))
1454 wake_up(&lgrs_deleted);
1455 }
1456 kfree(lgr);
1457}
1458
0cfdd8f9 1459/* remove a link group */
3f3f0e36 1460static void smc_lgr_free(struct smc_link_group *lgr)
0cfdd8f9 1461{
b9247544
KG
1462 int i;
1463
a52bcc91 1464 if (!lgr->is_smcd) {
b5dd4d69 1465 down_write(&lgr->llc_conf_mutex);
a52bcc91
KG
1466 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1467 if (lgr->lnk[i].state != SMC_LNK_UNUSED)
0a99be43 1468 smcr_link_clear(&lgr->lnk[i], false);
a52bcc91 1469 }
b5dd4d69 1470 up_write(&lgr->llc_conf_mutex);
a52bcc91
KG
1471 smc_llc_lgr_clear(lgr);
1472 }
1473
22ef473d 1474 destroy_workqueue(lgr->tx_wq);
b3cb53c0 1475 if (lgr->is_smcd) {
f9aab6f2 1476 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
8c81ba20 1477 put_device(lgr->smcd->ops->get_dev(lgr->smcd));
b3cb53c0 1478 }
61f434b0
WG
1479 smc_lgr_put(lgr); /* theoretically last lgr_put */
1480}
1481
1482void smc_lgr_hold(struct smc_link_group *lgr)
1483{
1484 refcount_inc(&lgr->refcnt);
1485}
1486
1487void smc_lgr_put(struct smc_link_group *lgr)
1488{
1489 if (refcount_dec_and_test(&lgr->refcnt))
1490 __smc_lgr_free(lgr);
0cfdd8f9
UB
1491}
1492
2a0674ff
UB
1493static void smc_sk_wake_ups(struct smc_sock *smc)
1494{
1495 smc->sk.sk_write_space(&smc->sk);
1496 smc->sk.sk_data_ready(&smc->sk);
1497 smc->sk.sk_state_change(&smc->sk);
1498}
1499
1500/* kill a connection */
5421ec28 1501static void smc_conn_kill(struct smc_connection *conn, bool soft)
2a0674ff
UB
1502{
1503 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1504
50c6b20e
UB
1505 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
1506 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
1507 else
1508 smc_close_abort(conn);
2a0674ff 1509 conn->killed = 1;
50c6b20e 1510 smc->sk.sk_err = ECONNABORTED;
2a0674ff 1511 smc_sk_wake_ups(smc);
42bfba9e
UB
1512 if (conn->lgr->is_smcd) {
1513 smc_ism_unset_conn(conn);
ae2be35c
WG
1514 if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
1515 smcd_buf_detach(conn);
5421ec28
UB
1516 if (soft)
1517 tasklet_kill(&conn->rx_tsklet);
1518 else
1519 tasklet_unlock_wait(&conn->rx_tsklet);
6a37ad3d 1520 } else {
349d4312 1521 smc_cdc_wait_pend_tx_wr(conn);
42bfba9e 1522 }
2a0674ff 1523 smc_lgr_unregister_conn(conn);
81cf4f47 1524 smc_close_active_abort(smc);
2a0674ff
UB
1525}
1526
42bfba9e
UB
1527static void smc_lgr_cleanup(struct smc_link_group *lgr)
1528{
1529 if (lgr->is_smcd) {
1530 smc_ism_signal_shutdown(lgr);
42bfba9e 1531 } else {
3e0c40af
KG
1532 u32 rsn = lgr->llc_termination_rsn;
1533
1534 if (!rsn)
1535 rsn = SMC_LLC_DEL_PROG_INIT_TERM;
1536 smc_llc_send_link_delete_all(lgr, false, rsn);
a52bcc91 1537 smcr_lgr_link_deactivate_all(lgr);
42bfba9e
UB
1538 }
1539}
1540
ba952060
KG
1541/* terminate link group
1542 * @soft: true if link group shutdown can take its time
1543 * false if immediate link group shutdown is required
1544 */
5421ec28 1545static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
0cfdd8f9
UB
1546{
1547 struct smc_connection *conn;
b38d7324 1548 struct smc_sock *smc;
0cfdd8f9
UB
1549 struct rb_node *node;
1550
517c300e
KG
1551 if (lgr->terminating)
1552 return; /* lgr already terminating */
a52bcc91 1553 /* cancel free_work sync, will terminate when lgr->freeing is set */
13085e1b 1554 cancel_delayed_work(&lgr->free_work);
517c300e 1555 lgr->terminating = 1;
0cfdd8f9 1556
69318b52
UB
1557 /* kill remaining link group connections */
1558 read_lock_bh(&lgr->conns_lock);
0cfdd8f9
UB
1559 node = rb_first(&lgr->conns_all);
1560 while (node) {
69318b52 1561 read_unlock_bh(&lgr->conns_lock);
0cfdd8f9 1562 conn = rb_entry(node, struct smc_connection, alert_node);
b38d7324 1563 smc = container_of(conn, struct smc_sock, conn);
81cf4f47 1564 sock_hold(&smc->sk); /* sock_put below */
69318b52 1565 lock_sock(&smc->sk);
5421ec28 1566 smc_conn_kill(conn, soft);
69318b52 1567 release_sock(&smc->sk);
81cf4f47 1568 sock_put(&smc->sk); /* sock_hold above */
69318b52 1569 read_lock_bh(&lgr->conns_lock);
0cfdd8f9
UB
1570 node = rb_first(&lgr->conns_all);
1571 }
69318b52 1572 read_unlock_bh(&lgr->conns_lock);
42bfba9e 1573 smc_lgr_cleanup(lgr);
a52bcc91 1574 smc_lgr_free(lgr);
0cfdd8f9
UB
1575}
1576
5f78fe96
KG
1577/* unlink link group and schedule termination */
1578void smc_lgr_terminate_sched(struct smc_link_group *lgr)
b9f227c3 1579{
a0a62ee1
UB
1580 spinlock_t *lgr_lock;
1581
1582 smc_lgr_list_head(lgr, &lgr_lock);
1583 spin_lock_bh(lgr_lock);
3739707c 1584 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
8caa6544
UB
1585 spin_unlock_bh(lgr_lock);
1586 return; /* lgr already terminating */
1587 }
1588 list_del_init(&lgr->list);
a52bcc91 1589 lgr->freeing = 1;
a0a62ee1 1590 spin_unlock_bh(lgr_lock);
5f78fe96 1591 schedule_work(&lgr->terminate_work);
b9f227c3
HW
1592}
1593
5421ec28 1594/* Called when peer lgr shutdown (regularly or abnormally) is received */
b40584d1
WG
1595void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
1596 unsigned short vlan)
c6ba7c9b
HW
1597{
1598 struct smc_link_group *lgr, *l;
1599 LIST_HEAD(lgr_free_list);
1600
1601 /* run common cleanup function and build free list */
a0a62ee1 1602 spin_lock_bh(&dev->lgr_lock);
a2351c5d 1603 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
b40584d1
WG
1604 if ((!peer_gid->gid ||
1605 (lgr->peer_gid.gid == peer_gid->gid &&
b27696cd 1606 !smc_ism_is_emulated(dev) ? 1 :
b40584d1 1607 lgr->peer_gid.gid_ext == peer_gid->gid_ext)) &&
0512f69e 1608 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
b40584d1 1609 if (peer_gid->gid) /* peer triggered termination */
50c6b20e 1610 lgr->peer_shutdown = 1;
c6ba7c9b 1611 list_move(&lgr->list, &lgr_free_list);
a52bcc91 1612 lgr->freeing = 1;
c6ba7c9b
HW
1613 }
1614 }
a0a62ee1 1615 spin_unlock_bh(&dev->lgr_lock);
c6ba7c9b
HW
1616
1617 /* cancel the regular free workers and actually free lgrs */
1618 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
1619 list_del_init(&lgr->list);
50c6b20e 1620 schedule_work(&lgr->terminate_work);
c6ba7c9b
HW
1621 }
1622}
1623
5421ec28
UB
1624/* Called when an SMCD device is removed or the smc module is unloaded */
1625void smc_smcd_terminate_all(struct smcd_dev *smcd)
1626{
1627 struct smc_link_group *lgr, *lg;
1628 LIST_HEAD(lgr_free_list);
1629
1630 spin_lock_bh(&smcd->lgr_lock);
1631 list_splice_init(&smcd->lgr_list, &lgr_free_list);
1632 list_for_each_entry(lgr, &lgr_free_list, list)
1633 lgr->freeing = 1;
1634 spin_unlock_bh(&smcd->lgr_lock);
1635
1636 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1637 list_del_init(&lgr->list);
1638 __smc_lgr_terminate(lgr, false);
1639 }
5edd6b9c
UB
1640
1641 if (atomic_read(&smcd->lgr_cnt))
1642 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
5421ec28
UB
1643}
1644
0b29ec64
UB
1645/* Called when an SMCR device is removed or the smc module is unloaded.
1646 * If smcibdev is given, all SMCR link groups using this device are terminated.
1647 * If smcibdev is NULL, all SMCR link groups are terminated.
1648 */
1649void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1650{
1651 struct smc_link_group *lgr, *lg;
1652 LIST_HEAD(lgr_free_list);
b9247544 1653 int i;
0b29ec64
UB
1654
1655 spin_lock_bh(&smc_lgr_list.lock);
1656 if (!smcibdev) {
1657 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1658 list_for_each_entry(lgr, &lgr_free_list, list)
1659 lgr->freeing = 1;
1660 } else {
1661 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
b9247544 1662 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
87523930 1663 if (lgr->lnk[i].smcibdev == smcibdev)
56d99e81 1664 smcr_link_down_cond_sched(&lgr->lnk[i]);
0b29ec64
UB
1665 }
1666 }
1667 }
1668 spin_unlock_bh(&smc_lgr_list.lock);
1669
1670 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1671 list_del_init(&lgr->list);
3e0c40af 1672 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
0b29ec64 1673 __smc_lgr_terminate(lgr, false);
349d4312
DL
1674 }
1675
6dabd405
UB
1676 if (smcibdev) {
1677 if (atomic_read(&smcibdev->lnk_cnt))
1678 wait_event(smcibdev->lnks_deleted,
1679 !atomic_read(&smcibdev->lnk_cnt));
1680 } else {
1681 if (atomic_read(&lgr_cnt))
1682 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1683 }
0b29ec64
UB
1684}
1685
ad6c111b
KG
1686/* set new lgr type and clear all asymmetric link tagging */
1687void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1688{
0a99be43 1689 char *lgr_type = "";
ad6c111b
KG
1690 int i;
1691
1692 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1693 if (smc_link_usable(&lgr->lnk[i]))
1694 lgr->lnk[i].link_is_asym = false;
0a99be43
KG
1695 if (lgr->type == new_type)
1696 return;
ad6c111b 1697 lgr->type = new_type;
0a99be43
KG
1698
1699 switch (lgr->type) {
1700 case SMC_LGR_NONE:
1701 lgr_type = "NONE";
1702 break;
1703 case SMC_LGR_SINGLE:
1704 lgr_type = "SINGLE";
1705 break;
1706 case SMC_LGR_SYMMETRIC:
1707 lgr_type = "SYMMETRIC";
1708 break;
1709 case SMC_LGR_ASYMMETRIC_PEER:
1710 lgr_type = "ASYMMETRIC_PEER";
1711 break;
1712 case SMC_LGR_ASYMMETRIC_LOCAL:
1713 lgr_type = "ASYMMETRIC_LOCAL";
1714 break;
1715 }
de2fea7b 1716 pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
0a99be43 1717 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
de2fea7b 1718 lgr->net->net_cookie, lgr_type, lgr->pnet_id);
ad6c111b
KG
1719}
1720
1721/* set new lgr type and tag a link as asymmetric */
1722void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1723 enum smc_lgr_type new_type, int asym_lnk_idx)
1724{
1725 smcr_lgr_set_type(lgr, new_type);
1726 lgr->lnk[asym_lnk_idx].link_is_asym = true;
1727}
1728
b286a065
KG
1729/* abort connection, abort_work scheduled from tasklet context */
1730static void smc_conn_abort_work(struct work_struct *work)
1731{
1732 struct smc_connection *conn = container_of(work,
1733 struct smc_connection,
1734 abort_work);
1735 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1736
a18cee47 1737 lock_sock(&smc->sk);
b286a065 1738 smc_conn_kill(conn, true);
a18cee47 1739 release_sock(&smc->sk);
b286a065
KG
1740 sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1741}
1742
1f90a05d
KG
1743void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1744{
1f90a05d
KG
1745 struct smc_link_group *lgr, *n;
1746
f5146e3e 1747 spin_lock_bh(&smc_lgr_list.lock);
1f90a05d 1748 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
c48254fa
KG
1749 struct smc_link *link;
1750
1f90a05d
KG
1751 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1752 SMC_MAX_PNETID_LEN) ||
1753 lgr->type == SMC_LGR_SYMMETRIC ||
0237a3a6
TL
1754 lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
1755 !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
1f90a05d 1756 continue;
c48254fa 1757
69b888e3
GW
1758 if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
1759 continue;
1760
c48254fa
KG
1761 /* trigger local add link processing */
1762 link = smc_llc_usable_link(lgr);
1763 if (link)
1764 smc_llc_add_link_local(link);
1f90a05d 1765 }
f5146e3e 1766 spin_unlock_bh(&smc_lgr_list.lock);
1f90a05d
KG
1767}
1768
541afa10
KG
1769/* link is down - switch connections to alternate link,
1770 * must be called under lgr->llc_conf_mutex lock
1771 */
1772static void smcr_link_down(struct smc_link *lnk)
1773{
1774 struct smc_link_group *lgr = lnk->lgr;
1775 struct smc_link *to_lnk;
1776 int del_link_id;
1777
1778 if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1779 return;
1780
c6f02ebe 1781 to_lnk = smc_switch_conns(lgr, lnk, true);
541afa10 1782 if (!to_lnk) { /* no backup link available */
0a99be43 1783 smcr_link_clear(lnk, true);
541afa10
KG
1784 return;
1785 }
ad6c111b 1786 smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
541afa10
KG
1787 del_link_id = lnk->link_id;
1788
1789 if (lgr->role == SMC_SERV) {
1790 /* trigger local delete link processing */
4dadd151 1791 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
541afa10
KG
1792 } else {
1793 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1794 /* another llc task is ongoing */
b5dd4d69 1795 up_write(&lgr->llc_conf_mutex);
6778a6be
KG
1796 wait_event_timeout(lgr->llc_flow_waiter,
1797 (list_empty(&lgr->list) ||
1798 lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
541afa10 1799 SMC_LLC_WAIT_TIME);
b5dd4d69 1800 down_write(&lgr->llc_conf_mutex);
541afa10 1801 }
68fd8942 1802 if (!list_empty(&lgr->list)) {
6778a6be
KG
1803 smc_llc_send_delete_link(to_lnk, del_link_id,
1804 SMC_LLC_REQ, true,
1805 SMC_LLC_DEL_LOST_PATH);
68fd8942
KG
1806 smcr_link_clear(lnk, true);
1807 }
6778a6be 1808 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
541afa10
KG
1809 }
1810}
1811
1812/* must be called under lgr->llc_conf_mutex lock */
1813void smcr_link_down_cond(struct smc_link *lnk)
1814{
a3a0e81b
TL
1815 if (smc_link_downing(&lnk->state)) {
1816 trace_smcr_link_down(lnk, __builtin_return_address(0));
541afa10 1817 smcr_link_down(lnk);
a3a0e81b 1818 }
541afa10
KG
1819}
1820
1821/* will get the lgr->llc_conf_mutex lock */
1822void smcr_link_down_cond_sched(struct smc_link *lnk)
1823{
a3a0e81b
TL
1824 if (smc_link_downing(&lnk->state)) {
1825 trace_smcr_link_down(lnk, __builtin_return_address(0));
2b33eb8f
GW
1826 smcr_link_hold(lnk); /* smcr_link_put in link_down_wrk */
1827 if (!schedule_work(&lnk->link_down_wrk))
1828 smcr_link_put(lnk);
a3a0e81b 1829 }
541afa10
KG
1830}
1831
1832void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1833{
1834 struct smc_link_group *lgr, *n;
1835 int i;
1836
1837 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1838 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1839 SMC_MAX_PNETID_LEN))
1840 continue; /* lgr is not affected */
1841 if (list_empty(&lgr->list))
1842 continue;
1843 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1844 struct smc_link *lnk = &lgr->lnk[i];
1845
1846 if (smc_link_usable(lnk) &&
1847 lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1848 smcr_link_down_cond_sched(lnk);
1849 }
1850 }
1851}
1852
541afa10
KG
1853static void smc_link_down_work(struct work_struct *work)
1854{
1855 struct smc_link *link = container_of(work, struct smc_link,
1856 link_down_wrk);
1857 struct smc_link_group *lgr = link->lgr;
1858
1859 if (list_empty(&lgr->list))
2b33eb8f 1860 goto out;
6778a6be 1861 wake_up_all(&lgr->llc_msg_waiter);
b5dd4d69 1862 down_write(&lgr->llc_conf_mutex);
541afa10 1863 smcr_link_down(link);
b5dd4d69 1864 up_write(&lgr->llc_conf_mutex);
2b33eb8f
GW
1865
1866out:
1867 smcr_link_put(link); /* smcr_link_hold by schedulers of link_down_work */
541afa10
KG
1868}
1869
587acad4
KG
1870static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
1871 struct netdev_nested_priv *priv)
1872{
1873 unsigned short *vlan_id = (unsigned short *)priv->data;
1874
1875 if (is_vlan_dev(lower_dev)) {
1876 *vlan_id = vlan_dev_vlan_id(lower_dev);
1877 return 1;
1878 }
1879
1880 return 0;
1881}
1882
1883/* Determine vlan of internal TCP socket. */
bc36d2fc 1884int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
0cfdd8f9
UB
1885{
1886 struct dst_entry *dst = sk_dst_get(clcsock->sk);
587acad4 1887 struct netdev_nested_priv priv;
cb9d43f6 1888 struct net_device *ndev;
587acad4 1889 int rc = 0;
0cfdd8f9 1890
bc36d2fc 1891 ini->vlan_id = 0;
0cfdd8f9
UB
1892 if (!dst) {
1893 rc = -ENOTCONN;
1894 goto out;
1895 }
1896 if (!dst->dev) {
1897 rc = -ENODEV;
1898 goto out_rel;
1899 }
1900
cb9d43f6
UB
1901 ndev = dst->dev;
1902 if (is_vlan_dev(ndev)) {
bc36d2fc 1903 ini->vlan_id = vlan_dev_vlan_id(ndev);
cb9d43f6
UB
1904 goto out_rel;
1905 }
1906
587acad4 1907 priv.data = (void *)&ini->vlan_id;
cb9d43f6 1908 rtnl_lock();
587acad4 1909 netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
cb9d43f6 1910 rtnl_unlock();
0cfdd8f9
UB
1911
1912out_rel:
1913 dst_release(dst);
1914out:
1915 return rc;
1916}
1917
e49300a6
KG
1918static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
1919 u8 peer_systemid[],
1920 u8 peer_gid[],
1921 u8 peer_mac_v1[],
0237a3a6
TL
1922 enum smc_lgr_role role, u32 clcqpn,
1923 struct net *net)
0cfdd8f9 1924{
0237a3a6 1925 struct smc_link *lnk;
b9247544
KG
1926 int i;
1927
e49300a6 1928 if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
b9247544
KG
1929 lgr->role != role)
1930 return false;
1931
1932 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
0237a3a6
TL
1933 lnk = &lgr->lnk[i];
1934
1935 if (!smc_link_active(lnk))
b9247544 1936 continue;
0237a3a6
TL
1937 /* use verbs API to check netns, instead of lgr->net */
1938 if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
1939 return false;
1940 if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
1941 !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
e49300a6 1942 (smcr_version == SMC_V2 ||
0237a3a6 1943 !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
b9247544
KG
1944 return true;
1945 }
1946 return false;
c6ba7c9b 1947}
0cfdd8f9 1948
c6ba7c9b 1949static bool smcd_lgr_match(struct smc_link_group *lgr,
b40584d1
WG
1950 struct smcd_dev *smcismdev,
1951 struct smcd_gid *peer_gid)
c6ba7c9b 1952{
c3dfcdb6
WG
1953 if (lgr->peer_gid.gid != peer_gid->gid ||
1954 lgr->smcd != smcismdev)
1955 return false;
1956
b27696cd 1957 if (smc_ism_is_emulated(smcismdev) &&
c3dfcdb6
WG
1958 lgr->peer_gid.gid_ext != peer_gid->gid_ext)
1959 return false;
1960
1961 return true;
0cfdd8f9
UB
1962}
1963
1964/* create a new SMC connection (and a new link group if necessary) */
bc36d2fc 1965int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
0cfdd8f9
UB
1966{
1967 struct smc_connection *conn = &smc->conn;
0237a3a6 1968 struct net *net = sock_net(&smc->sk);
a2351c5d 1969 struct list_head *lgr_list;
0cfdd8f9 1970 struct smc_link_group *lgr;
0cfdd8f9 1971 enum smc_lgr_role role;
a0a62ee1 1972 spinlock_t *lgr_lock;
0cfdd8f9
UB
1973 int rc = 0;
1974
5c21c4cc 1975 lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
3fc64937 1976 &smc_lgr_list.list;
5c21c4cc 1977 lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
3fc64937 1978 &smc_lgr_list.lock;
5ac54d87 1979 ini->first_contact_local = 1;
0cfdd8f9 1980 role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
5ac54d87 1981 if (role == SMC_CLNT && ini->first_contact_peer)
0cfdd8f9
UB
1982 /* create new link group as well */
1983 goto create;
1984
1985 /* determine if an existing link group can be reused */
a0a62ee1 1986 spin_lock_bh(lgr_lock);
a2351c5d 1987 list_for_each_entry(lgr, lgr_list, list) {
0cfdd8f9 1988 write_lock_bh(&lgr->conns_lock);
bc36d2fc 1989 if ((ini->is_smcd ?
5c21c4cc 1990 smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
b40584d1 1991 &ini->ism_peer_gid[ini->ism_selected]) :
e49300a6
KG
1992 smcr_lgr_match(lgr, ini->smcr_version,
1993 ini->peer_systemid,
1994 ini->peer_gid, ini->peer_mac, role,
0237a3a6 1995 ini->ib_clcqpn, net)) &&
0cfdd8f9 1996 !lgr->sync_err &&
0530bd6e
KG
1997 (ini->smcd_version == SMC_V2 ||
1998 lgr->vlan_id == ini->vlan_id) &&
a9e44502 1999 (role == SMC_CLNT || ini->is_smcd ||
7f0620b9 2000 (lgr->conns_num < lgr->max_conns &&
4940a1fd 2001 !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
0cfdd8f9 2002 /* link group found */
5ac54d87 2003 ini->first_contact_local = 0;
0cfdd8f9 2004 conn->lgr = lgr;
56bc3b20 2005 rc = smc_lgr_register_conn(conn, false);
0cfdd8f9 2006 write_unlock_bh(&lgr->conns_lock);
b9247544
KG
2007 if (!rc && delayed_work_pending(&lgr->free_work))
2008 cancel_delayed_work(&lgr->free_work);
0cfdd8f9
UB
2009 break;
2010 }
2011 write_unlock_bh(&lgr->conns_lock);
2012 }
a0a62ee1 2013 spin_unlock_bh(lgr_lock);
b9247544
KG
2014 if (rc)
2015 return rc;
0cfdd8f9 2016
5ac54d87
UB
2017 if (role == SMC_CLNT && !ini->first_contact_peer &&
2018 ini->first_contact_local) {
0cfdd8f9
UB
2019 /* Server reuses a link group, but Client wants to start
2020 * a new one
2021 * send out_of_sync decline, reason synchr. error
2022 */
7a62725a 2023 return SMC_CLC_DECL_SYNCERR;
0cfdd8f9
UB
2024 }
2025
2026create:
5ac54d87 2027 if (ini->first_contact_local) {
bc36d2fc 2028 rc = smc_lgr_create(smc, ini);
0cfdd8f9
UB
2029 if (rc)
2030 goto out;
44808792
HZ
2031 lgr = conn->lgr;
2032 write_lock_bh(&lgr->conns_lock);
56bc3b20 2033 rc = smc_lgr_register_conn(conn, true);
44808792 2034 write_unlock_bh(&lgr->conns_lock);
36595d8a
WG
2035 if (rc) {
2036 smc_lgr_cleanup_early(lgr);
b9247544 2037 goto out;
36595d8a 2038 }
0cfdd8f9 2039 }
61f434b0 2040 smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
20c9398d
WG
2041 if (!conn->lgr->is_smcd)
2042 smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
61f434b0 2043 conn->freed = 0;
5f08318f 2044 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
cbba07a7 2045 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
de8474eb 2046 conn->urg_state = SMC_URG_READ;
349d4312 2047 init_waitqueue_head(&conn->cdc_pend_tx_wq);
b286a065 2048 INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
bc36d2fc 2049 if (ini->is_smcd) {
be244f28
HW
2050 conn->rx_off = sizeof(struct smcd_cdc_msg);
2051 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
2d2bfeb8
UB
2052 } else {
2053 conn->rx_off = 0;
be244f28 2054 }
5f08318f
UB
2055#ifndef KERNEL_HAS_ATOMIC64
2056 spin_lock_init(&conn->acurs_lock);
2057#endif
0cfdd8f9
UB
2058
2059out:
7a62725a 2060 return rc;
0cfdd8f9 2061}
cd6851f3 2062
67161779 2063#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
2f4b101c 2064#define SMCR_RMBE_SIZES 15 /* 0 -> 16KB, 1 -> 32KB, .. 15 -> 512MB */
67161779
SR
2065
2066/* convert the RMB size into the compressed notation (minimum 16K, see
2067 * SMCD/R_DMBE_SIZES.
2f6becaf
HW
2068 * In contrast to plain ilog2, this rounds towards the next power of 2,
2069 * so the socket application gets at least its desired sndbuf / rcvbuf size.
2070 */
67161779 2071static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
2f6becaf
HW
2072{
2073 u8 compressed;
2074
2075 if (size <= SMC_BUF_MIN_SIZE)
2076 return 0;
2077
67161779
SR
2078 size = (size - 1) >> 14; /* convert to 16K multiple */
2079 compressed = min_t(u8, ilog2(size) + 1,
2080 is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
2081
3ac14b9d 2082#ifdef CONFIG_ARCH_NO_SG_CHAIN
67161779
SR
2083 if (!is_smcd && is_rmb)
2084 /* RMBs are backed by & limited to max size of scatterlists */
3ac14b9d
GW
2085 compressed = min_t(u8, compressed, ilog2((SG_MAX_SINGLE_ALLOC * PAGE_SIZE) >> 14));
2086#endif
67161779 2087
2f6becaf
HW
2088 return compressed;
2089}
2090
2091/* convert the RMB size from compressed notation into integer */
2092int smc_uncompress_bufsize(u8 compressed)
2093{
2094 u32 size;
2095
2096 size = 0x00000001 << (((int)compressed) + 14);
2097 return (int)size;
2098}
2099
3e034725
UB
2100/* try to reuse a sndbuf or rmb description slot for a certain
2101 * buffer size; if not available, return NULL
cd6851f3 2102 */
8437bda0 2103static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
aff7bfed 2104 struct rw_semaphore *lock,
8437bda0 2105 struct list_head *buf_list)
cd6851f3 2106{
3e034725 2107 struct smc_buf_desc *buf_slot;
cd6851f3 2108
aff7bfed 2109 down_read(lock);
3e034725
UB
2110 list_for_each_entry(buf_slot, buf_list, list) {
2111 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
aff7bfed 2112 up_read(lock);
3e034725 2113 return buf_slot;
cd6851f3
UB
2114 }
2115 }
aff7bfed 2116 up_read(lock);
cd6851f3
UB
2117 return NULL;
2118}
2119
952310cc
UB
2120/* one of the conditions for announcing a receiver's current window size is
2121 * that it "results in a minimum increase in the window size of 10% of the
2122 * receive buffer space" [RFC7609]
2123 */
2124static inline int smc_rmb_wnd_update_limit(int rmbe_size)
2125{
6bf536eb 2126 return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
952310cc
UB
2127}
2128
b8d19945 2129/* map an buf to a link */
b9247544
KG
2130static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
2131 struct smc_link *lnk)
2132{
b8d19945
WG
2133 int rc, i, nents, offset, buf_size, size, access_flags;
2134 struct scatterlist *sg;
2135 void *buf;
b9247544
KG
2136
2137 if (buf_desc->is_map_ib[lnk->link_idx])
2138 return 0;
2139
b8d19945
WG
2140 if (buf_desc->is_vm) {
2141 buf = buf_desc->cpu_addr;
2142 buf_size = buf_desc->len;
2143 offset = offset_in_page(buf_desc->cpu_addr);
2144 nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
2145 } else {
2146 nents = 1;
2147 }
2148
2149 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
b9247544
KG
2150 if (rc)
2151 return rc;
b8d19945
WG
2152
2153 if (buf_desc->is_vm) {
2154 /* virtually contiguous buffer */
2155 for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
2156 size = min_t(int, PAGE_SIZE - offset, buf_size);
2157 sg_set_page(sg, vmalloc_to_page(buf), size, offset);
10bc9761 2158 buf += size;
b8d19945
WG
2159 buf_size -= size;
2160 offset = 0;
2161 }
2162 } else {
2163 /* physically contiguous buffer */
2164 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
2165 buf_desc->cpu_addr, buf_desc->len);
2166 }
b9247544
KG
2167
2168 /* map sg table to DMA address */
2169 rc = smc_ib_buf_map_sg(lnk, buf_desc,
2170 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2171 /* SMC protocol depends on mapping to one DMA address only */
b8d19945 2172 if (rc != nents) {
b9247544
KG
2173 rc = -EAGAIN;
2174 goto free_table;
2175 }
2176
0ef69e78
GW
2177 buf_desc->is_dma_need_sync |=
2178 smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
2179
b8d19945
WG
2180 if (is_rmb || buf_desc->is_vm) {
2181 /* create a new memory region for the RMB or vzalloced sndbuf */
2182 access_flags = is_rmb ?
2183 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
2184 IB_ACCESS_LOCAL_WRITE;
2185
2186 rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
b9247544
KG
2187 buf_desc, lnk->link_idx);
2188 if (rc)
2189 goto buf_unmap;
b8d19945
WG
2190 smc_ib_sync_sg_for_device(lnk, buf_desc,
2191 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
b9247544
KG
2192 }
2193 buf_desc->is_map_ib[lnk->link_idx] = true;
2194 return 0;
2195
2196buf_unmap:
2197 smc_ib_buf_unmap_sg(lnk, buf_desc,
2198 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2199free_table:
2200 sg_free_table(&buf_desc->sgt[lnk->link_idx]);
2201 return rc;
2202}
2203
b8d19945 2204/* register a new buf on IB device, rmb or vzalloced sndbuf
d5500667
KG
2205 * must be called under lgr->llc_conf_mutex lock
2206 */
b8d19945 2207int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
7562a13d
KG
2208{
2209 if (list_empty(&link->lgr->list))
2210 return -ENOLINK;
b8d19945
WG
2211 if (!buf_desc->is_reg_mr[link->link_idx]) {
2212 /* register memory region for new buf */
2213 if (buf_desc->is_vm)
2214 buf_desc->mr[link->link_idx]->iova =
2215 (uintptr_t)buf_desc->cpu_addr;
2216 if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
2217 buf_desc->is_reg_err = true;
7562a13d
KG
2218 return -EFAULT;
2219 }
b8d19945 2220 buf_desc->is_reg_mr[link->link_idx] = true;
7562a13d
KG
2221 }
2222 return 0;
2223}
2224
aff7bfed 2225static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
fb33d277
KG
2226 struct list_head *lst, bool is_rmb)
2227{
2228 struct smc_buf_desc *buf_desc, *bf;
2229 int rc = 0;
2230
aff7bfed 2231 down_write(lock);
fb33d277
KG
2232 list_for_each_entry_safe(buf_desc, bf, lst, list) {
2233 if (!buf_desc->used)
2234 continue;
2235 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
2236 if (rc)
2237 goto out;
2238 }
2239out:
aff7bfed 2240 up_write(lock);
fb33d277
KG
2241 return rc;
2242}
2243
2244/* map all used buffers of lgr for a new link */
2245int smcr_buf_map_lgr(struct smc_link *lnk)
2246{
2247 struct smc_link_group *lgr = lnk->lgr;
2248 int i, rc = 0;
2249
2250 for (i = 0; i < SMC_RMBE_SIZES; i++) {
2251 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
2252 &lgr->rmbs[i], true);
2253 if (rc)
2254 return rc;
2255 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
2256 &lgr->sndbufs[i], false);
2257 if (rc)
2258 return rc;
2259 }
2260 return 0;
2261}
2262
d5500667
KG
2263/* register all used buffers of lgr for a new link,
2264 * must be called under lgr->llc_conf_mutex lock
2265 */
fb33d277
KG
2266int smcr_buf_reg_lgr(struct smc_link *lnk)
2267{
2268 struct smc_link_group *lgr = lnk->lgr;
2269 struct smc_buf_desc *buf_desc, *bf;
2270 int i, rc = 0;
2271
b8d19945 2272 /* reg all RMBs for a new link */
aff7bfed 2273 down_write(&lgr->rmbs_lock);
fb33d277
KG
2274 for (i = 0; i < SMC_RMBE_SIZES; i++) {
2275 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
2276 if (!buf_desc->used)
2277 continue;
b8d19945
WG
2278 rc = smcr_link_reg_buf(lnk, buf_desc);
2279 if (rc) {
aff7bfed 2280 up_write(&lgr->rmbs_lock);
b8d19945
WG
2281 return rc;
2282 }
fb33d277
KG
2283 }
2284 }
aff7bfed 2285 up_write(&lgr->rmbs_lock);
b8d19945
WG
2286
2287 if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2288 return rc;
2289
2290 /* reg all vzalloced sndbufs for a new link */
aff7bfed 2291 down_write(&lgr->sndbufs_lock);
b8d19945
WG
2292 for (i = 0; i < SMC_RMBE_SIZES; i++) {
2293 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
2294 if (!buf_desc->used || !buf_desc->is_vm)
2295 continue;
2296 rc = smcr_link_reg_buf(lnk, buf_desc);
2297 if (rc) {
aff7bfed 2298 up_write(&lgr->sndbufs_lock);
b8d19945
WG
2299 return rc;
2300 }
2301 }
2302 }
aff7bfed 2303 up_write(&lgr->sndbufs_lock);
fb33d277
KG
2304 return rc;
2305}
2306
c6ba7c9b 2307static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
0908503a 2308 int bufsize)
b33982c3
UB
2309{
2310 struct smc_buf_desc *buf_desc;
b33982c3
UB
2311
2312 /* try to alloc a new buffer */
2313 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2314 if (!buf_desc)
2315 return ERR_PTR(-ENOMEM);
2316
b8d19945
WG
2317 switch (lgr->buf_type) {
2318 case SMCR_PHYS_CONT_BUFS:
2319 case SMCR_MIXED_BUFS:
2320 buf_desc->order = get_order(bufsize);
2321 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
2322 __GFP_NOMEMALLOC | __GFP_COMP |
2323 __GFP_NORETRY | __GFP_ZERO,
2324 buf_desc->order);
2325 if (buf_desc->pages) {
2326 buf_desc->cpu_addr =
2327 (void *)page_address(buf_desc->pages);
2328 buf_desc->len = bufsize;
2329 buf_desc->is_vm = false;
2330 break;
2331 }
2332 if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2333 goto out;
cd959bf7 2334 fallthrough; // try virtually contiguous buf
b8d19945
WG
2335 case SMCR_VIRT_CONT_BUFS:
2336 buf_desc->order = get_order(bufsize);
2337 buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
2338 if (!buf_desc->cpu_addr)
2339 goto out;
2340 buf_desc->pages = NULL;
2341 buf_desc->len = bufsize;
2342 buf_desc->is_vm = true;
2343 break;
2344 }
b9247544 2345 return buf_desc;
b8d19945
WG
2346
2347out:
2348 kfree(buf_desc);
2349 return ERR_PTR(-EAGAIN);
b9247544 2350}
b33982c3 2351
b9247544
KG
2352/* map buf_desc on all usable links,
2353 * unused buffers stay mapped as long as the link is up
2354 */
2355static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
2356 struct smc_buf_desc *buf_desc, bool is_rmb)
2357{
e738455b 2358 int i, rc = 0, cnt = 0;
b33982c3 2359
d5500667 2360 /* protect against parallel link reconfiguration */
f6421014 2361 down_read(&lgr->llc_conf_mutex);
b9247544
KG
2362 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2363 struct smc_link *lnk = &lgr->lnk[i];
b33982c3 2364
d854fcbf 2365 if (!smc_link_usable(lnk))
b9247544
KG
2366 continue;
2367 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
b9247544
KG
2368 rc = -ENOMEM;
2369 goto out;
b33982c3 2370 }
e738455b 2371 cnt++;
b33982c3 2372 }
b9247544 2373out:
f6421014 2374 up_read(&lgr->llc_conf_mutex);
e738455b
WG
2375 if (!rc && !cnt)
2376 rc = -EINVAL;
b9247544 2377 return rc;
b33982c3
UB
2378}
2379
c6ba7c9b
HW
2380static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
2381 bool is_dmb, int bufsize)
2382{
2383 struct smc_buf_desc *buf_desc;
2384 int rc;
2385
c6ba7c9b
HW
2386 /* try to alloc a new DMB */
2387 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2388 if (!buf_desc)
2389 return ERR_PTR(-ENOMEM);
2390 if (is_dmb) {
2391 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
2392 if (rc) {
2393 kfree(buf_desc);
96d6fded
KG
2394 if (rc == -ENOMEM)
2395 return ERR_PTR(-EAGAIN);
2396 if (rc == -ENOSPC)
2397 return ERR_PTR(-ENOSPC);
2398 return ERR_PTR(-EIO);
c6ba7c9b 2399 }
be244f28
HW
2400 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
2401 /* CDC header stored in buf. So, pretend it was smaller */
2402 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
c6ba7c9b
HW
2403 } else {
2404 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
2405 __GFP_NOWARN | __GFP_NORETRY |
2406 __GFP_NOMEMALLOC);
2407 if (!buf_desc->cpu_addr) {
2408 kfree(buf_desc);
2409 return ERR_PTR(-EAGAIN);
2410 }
2411 buf_desc->len = bufsize;
2412 }
2413 return buf_desc;
2414}
2415
2416static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
cd6851f3 2417{
8437bda0 2418 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
cd6851f3
UB
2419 struct smc_connection *conn = &smc->conn;
2420 struct smc_link_group *lgr = conn->lgr;
3e034725 2421 struct list_head *buf_list;
833bac7e 2422 int bufsize, bufsize_comp;
aff7bfed 2423 struct rw_semaphore *lock; /* lock buffer list */
e0e4b8fa 2424 bool is_dgraded = false;
cd6851f3 2425
3e034725
UB
2426 if (is_rmb)
2427 /* use socket recv buffer size (w/o overhead) as start value */
833bac7e 2428 bufsize = smc->sk.sk_rcvbuf / 2;
3e034725
UB
2429 else
2430 /* use socket send buffer size (w/o overhead) as start value */
833bac7e 2431 bufsize = smc->sk.sk_sndbuf / 2;
3e034725 2432
833bac7e
GB
2433 for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
2434 bufsize_comp >= 0; bufsize_comp--) {
3e034725
UB
2435 if (is_rmb) {
2436 lock = &lgr->rmbs_lock;
833bac7e 2437 buf_list = &lgr->rmbs[bufsize_comp];
3e034725
UB
2438 } else {
2439 lock = &lgr->sndbufs_lock;
833bac7e 2440 buf_list = &lgr->sndbufs[bufsize_comp];
9d8fb617 2441 }
833bac7e 2442 bufsize = smc_uncompress_bufsize(bufsize_comp);
a3fe3d01 2443
3e034725 2444 /* check for reusable slot in the link group */
833bac7e 2445 buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
3e034725 2446 if (buf_desc) {
0ef69e78 2447 buf_desc->is_dma_need_sync = 0;
e0d10354 2448 SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
194730a9 2449 SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
cd6851f3
UB
2450 break; /* found reusable slot */
2451 }
a3fe3d01 2452
c6ba7c9b
HW
2453 if (is_smcd)
2454 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
2455 else
0908503a 2456 buf_desc = smcr_new_buf_create(lgr, bufsize);
c6ba7c9b 2457
b33982c3
UB
2458 if (PTR_ERR(buf_desc) == -ENOMEM)
2459 break;
e0e4b8fa
GG
2460 if (IS_ERR(buf_desc)) {
2461 if (!is_dgraded) {
2462 is_dgraded = true;
194730a9 2463 SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
e0e4b8fa 2464 }
a3fe3d01 2465 continue;
e0e4b8fa 2466 }
897e1c24 2467
194730a9 2468 SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
e0d10354 2469 SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
3e034725 2470 buf_desc->used = 1;
aff7bfed 2471 down_write(lock);
d386d59b 2472 smc_lgr_buf_list_add(lgr, is_rmb, buf_list, buf_desc);
aff7bfed 2473 up_write(lock);
3e034725 2474 break; /* found */
cd6851f3 2475 }
3e034725 2476
b33982c3 2477 if (IS_ERR(buf_desc))
72b7f6c4 2478 return PTR_ERR(buf_desc);
3e034725 2479
b9247544
KG
2480 if (!is_smcd) {
2481 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
b8d19945 2482 smcr_buf_unuse(buf_desc, is_rmb, lgr);
b9247544
KG
2483 return -ENOMEM;
2484 }
2485 }
2486
3e034725
UB
2487 if (is_rmb) {
2488 conn->rmb_desc = buf_desc;
833bac7e
GB
2489 conn->rmbe_size_comp = bufsize_comp;
2490 smc->sk.sk_rcvbuf = bufsize * 2;
5f08318f 2491 atomic_set(&conn->bytes_to_rcv, 0);
be244f28
HW
2492 conn->rmbe_update_limit =
2493 smc_rmb_wnd_update_limit(buf_desc->len);
c6ba7c9b
HW
2494 if (is_smcd)
2495 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
cd6851f3 2496 } else {
3e034725 2497 conn->sndbuf_desc = buf_desc;
833bac7e 2498 smc->sk.sk_sndbuf = bufsize * 2;
3e034725 2499 atomic_set(&conn->sndbuf_space, bufsize);
cd6851f3 2500 }
3e034725
UB
2501 return 0;
2502}
2503
10428dd8
UB
2504void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
2505{
0ef69e78
GW
2506 if (!conn->sndbuf_desc->is_dma_need_sync)
2507 return;
ea89c6c0
WG
2508 if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
2509 !smc_link_active(conn->lnk))
c6ba7c9b 2510 return;
387707fd 2511 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
10428dd8
UB
2512}
2513
2514void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
2515{
b9247544 2516 int i;
10428dd8 2517
0ef69e78
GW
2518 if (!conn->rmb_desc->is_dma_need_sync)
2519 return;
ea89c6c0 2520 if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
c6ba7c9b 2521 return;
b9247544 2522 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
741a49a4 2523 if (!smc_link_active(&conn->lgr->lnk[i]))
b9247544
KG
2524 continue;
2525 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
2526 DMA_FROM_DEVICE);
2527 }
10428dd8
UB
2528}
2529
3e034725
UB
2530/* create the send and receive buffer for an SMC socket;
2531 * receive buffers are called RMBs;
2532 * (even though the SMC protocol allows more than one RMB-element per RMB,
2533 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2534 * extra RMB for every connection in a link group
2535 */
c6ba7c9b 2536int smc_buf_create(struct smc_sock *smc, bool is_smcd)
3e034725
UB
2537{
2538 int rc;
2539
2540 /* create send buffer */
ae2be35c
WG
2541 if (is_smcd &&
2542 smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd))
2543 goto create_rmb;
2544
c6ba7c9b 2545 rc = __smc_buf_create(smc, is_smcd, false);
3e034725
UB
2546 if (rc)
2547 return rc;
ae2be35c
WG
2548
2549create_rmb:
3e034725 2550 /* create rmb */
c6ba7c9b 2551 rc = __smc_buf_create(smc, is_smcd, true);
ae2be35c 2552 if (rc && smc->conn.sndbuf_desc) {
aff7bfed 2553 down_write(&smc->conn.lgr->sndbufs_lock);
d386d59b
WG
2554 smc_lgr_buf_list_del(smc->conn.lgr, false,
2555 smc->conn.sndbuf_desc);
aff7bfed 2556 up_write(&smc->conn.lgr->sndbufs_lock);
6511aad3 2557 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1d8df41d 2558 smc->conn.sndbuf_desc = NULL;
fd7f3a74 2559 }
3e034725 2560 return rc;
cd6851f3 2561}
bd4ad577 2562
ae2be35c
WG
2563int smcd_buf_attach(struct smc_sock *smc)
2564{
2565 struct smc_connection *conn = &smc->conn;
2566 struct smcd_dev *smcd = conn->lgr->smcd;
2567 u64 peer_token = conn->peer_token;
2568 struct smc_buf_desc *buf_desc;
2569 int rc;
2570
2571 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2572 if (!buf_desc)
2573 return -ENOMEM;
2574
2575 /* The ghost sndbuf_desc describes the same memory region as
2576 * peer RMB. Its lifecycle is consistent with the connection's
2577 * and it will be freed with the connections instead of the
2578 * link group.
2579 */
2580 rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
2581 if (rc)
2582 goto free;
2583
2584 smc->sk.sk_sndbuf = buf_desc->len;
2585 buf_desc->cpu_addr =
2586 (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
2587 buf_desc->len -= sizeof(struct smcd_cdc_msg);
2588 conn->sndbuf_desc = buf_desc;
2589 conn->sndbuf_desc->used = 1;
2590 atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
2591 return 0;
2592
2593free:
2594 kfree(buf_desc);
2595 return rc;
2596}
2597
bd4ad577
UB
2598static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
2599{
2600 int i;
2601
2602 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
2603 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
2604 return i;
2605 }
2606 return -ENOSPC;
2607}
2608
ba21abd2
KG
2609static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
2610 u32 rkey)
2611{
2612 int i;
2613
2614 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2615 if (test_bit(i, lgr->rtokens_used_mask) &&
2616 lgr->rtokens[i][lnk_idx].rkey == rkey)
2617 return i;
2618 }
2619 return -ENOENT;
2620}
2621
2622/* set rtoken for a new link to an existing rmb */
2623void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
2624 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
2625{
2626 int rtok_idx;
2627
2628 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
2629 if (rtok_idx == -ENOENT)
2630 return;
2631 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
2632 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
2633}
2634
2635/* set rtoken for a new link whose link_id is given */
2636void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
2637 __be64 nw_vaddr, __be32 nw_rkey)
2638{
2639 u64 dma_addr = be64_to_cpu(nw_vaddr);
2640 u32 rkey = ntohl(nw_rkey);
2641 bool found = false;
2642 int link_idx;
2643
2644 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
2645 if (lgr->lnk[link_idx].link_id == link_id) {
2646 found = true;
2647 break;
2648 }
2649 }
2650 if (!found)
2651 return;
2652 lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
2653 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
2654}
2655
4ed75de5 2656/* add a new rtoken from peer */
387707fd 2657int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
bd4ad577 2658{
387707fd 2659 struct smc_link_group *lgr = smc_get_lgr(lnk);
4ed75de5
KG
2660 u64 dma_addr = be64_to_cpu(nw_vaddr);
2661 u32 rkey = ntohl(nw_rkey);
bd4ad577
UB
2662 int i;
2663
2664 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
387707fd
KG
2665 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2666 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
bd4ad577 2667 test_bit(i, lgr->rtokens_used_mask)) {
4ed75de5
KG
2668 /* already in list */
2669 return i;
2670 }
2671 }
2672 i = smc_rmb_reserve_rtoken_idx(lgr);
2673 if (i < 0)
2674 return i;
387707fd
KG
2675 lgr->rtokens[i][lnk->link_idx].rkey = rkey;
2676 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
4ed75de5
KG
2677 return i;
2678}
2679
e07d31dc 2680/* delete an rtoken from all links */
387707fd 2681int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
4ed75de5 2682{
387707fd 2683 struct smc_link_group *lgr = smc_get_lgr(lnk);
4ed75de5 2684 u32 rkey = ntohl(nw_rkey);
e07d31dc 2685 int i, j;
4ed75de5
KG
2686
2687 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
387707fd 2688 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
4ed75de5 2689 test_bit(i, lgr->rtokens_used_mask)) {
e07d31dc
KG
2690 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
2691 lgr->rtokens[i][j].rkey = 0;
2692 lgr->rtokens[i][j].dma_addr = 0;
2693 }
4ed75de5 2694 clear_bit(i, lgr->rtokens_used_mask);
bd4ad577
UB
2695 return 0;
2696 }
2697 }
4ed75de5
KG
2698 return -ENOENT;
2699}
2700
2701/* save rkey and dma_addr received from peer during clc handshake */
2702int smc_rmb_rtoken_handling(struct smc_connection *conn,
e07d31dc 2703 struct smc_link *lnk,
4ed75de5
KG
2704 struct smc_clc_msg_accept_confirm *clc)
2705{
3d9725a6
UB
2706 conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
2707 clc->r0.rmb_rkey);
bd4ad577
UB
2708 if (conn->rtoken_idx < 0)
2709 return conn->rtoken_idx;
bd4ad577
UB
2710 return 0;
2711}
9fda3510 2712
c3d9494e
UB
2713static void smc_core_going_away(void)
2714{
2715 struct smc_ib_device *smcibdev;
2716 struct smcd_dev *smcd;
2717
92f3cb0e 2718 mutex_lock(&smc_ib_devices.mutex);
c3d9494e
UB
2719 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
2720 int i;
2721
2722 for (i = 0; i < SMC_MAX_PORTS; i++)
2723 set_bit(i, smcibdev->ports_going_away);
2724 }
92f3cb0e 2725 mutex_unlock(&smc_ib_devices.mutex);
c3d9494e 2726
82087c03 2727 mutex_lock(&smcd_dev_list.mutex);
c3d9494e
UB
2728 list_for_each_entry(smcd, &smcd_dev_list.list, list) {
2729 smcd->going_away = 1;
2730 }
82087c03 2731 mutex_unlock(&smcd_dev_list.mutex);
c3d9494e
UB
2732}
2733
5421ec28
UB
2734/* Clean up all SMC link groups */
2735static void smc_lgrs_shutdown(void)
9fda3510 2736{
a2351c5d 2737 struct smcd_dev *smcd;
9fda3510 2738
c3d9494e
UB
2739 smc_core_going_away();
2740
0b29ec64 2741 smc_smcr_terminate_all(NULL);
a2351c5d 2742
82087c03 2743 mutex_lock(&smcd_dev_list.mutex);
a2351c5d 2744 list_for_each_entry(smcd, &smcd_dev_list.list, list)
5421ec28 2745 smc_smcd_terminate_all(smcd);
82087c03 2746 mutex_unlock(&smcd_dev_list.mutex);
9fda3510 2747}
5421ec28 2748
a33a803c
UB
2749static int smc_core_reboot_event(struct notifier_block *this,
2750 unsigned long event, void *ptr)
2751{
2752 smc_lgrs_shutdown();
28a3b840 2753 smc_ib_unregister_client();
8747716f 2754 smc_ism_exit();
a33a803c
UB
2755 return 0;
2756}
2757
2758static struct notifier_block smc_reboot_notifier = {
2759 .notifier_call = smc_core_reboot_event,
2760};
2761
6dabd405
UB
2762int __init smc_core_init(void)
2763{
a33a803c 2764 return register_reboot_notifier(&smc_reboot_notifier);
6dabd405
UB
2765}
2766
5421ec28
UB
2767/* Called (from smc_exit) when module is removed */
2768void smc_core_exit(void)
2769{
a33a803c 2770 unregister_reboot_notifier(&smc_reboot_notifier);
5421ec28
UB
2771 smc_lgrs_shutdown();
2772}