Merge branch 'x86-mds-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / net / smc / smc_core.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
0cfdd8f9
UB
2/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * Basic Transport Functions exploiting Infiniband API
6 *
7 * Copyright IBM Corp. 2016
8 *
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
10 */
11
12#include <linux/socket.h>
13#include <linux/if_vlan.h>
14#include <linux/random.h>
15#include <linux/workqueue.h>
16#include <net/tcp.h>
17#include <net/sock.h>
18#include <rdma/ib_verbs.h>
ddb457c6 19#include <rdma/ib_cache.h>
0cfdd8f9
UB
20
21#include "smc.h"
22#include "smc_clc.h"
23#include "smc_core.h"
24#include "smc_ib.h"
f38ba179 25#include "smc_wr.h"
9bf9abea 26#include "smc_llc.h"
5f08318f 27#include "smc_cdc.h"
b38d7324 28#include "smc_close.h"
c6ba7c9b 29#include "smc_ism.h"
0cfdd8f9 30
5bc11ddb
UB
31#define SMC_LGR_NUM_INCR 256
32#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
7f58a1ad 33#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
0d18a0cb 34#define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
0cfdd8f9 35
9fda3510
HW
36static struct smc_lgr_list smc_lgr_list = { /* established link groups */
37 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
38 .list = LIST_HEAD_INIT(smc_lgr_list.list),
39 .num = 0,
40};
9bf9abea 41
6511aad3
HW
42static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
43 struct smc_buf_desc *buf_desc);
a6920d1d 44
97cdbc42
KG
45static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
46{
47 /* client link group creation always follows the server link group
48 * creation. For client use a somewhat higher removal delay time,
49 * otherwise there is a risk of out-of-sync link groups.
50 */
51 mod_delayed_work(system_wq, &lgr->free_work,
c6ba7c9b
HW
52 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
53 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
97cdbc42
KG
54}
55
0d18a0cb
KG
56void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
57{
58 mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
97cdbc42
KG
59}
60
0cfdd8f9
UB
61/* Register connection's alert token in our lookup structure.
62 * To use rbtrees we have to implement our own insert core.
63 * Requires @conns_lock
64 * @smc connection to register
65 * Returns 0 on success, != otherwise.
66 */
67static void smc_lgr_add_alert_token(struct smc_connection *conn)
68{
69 struct rb_node **link, *parent = NULL;
70 u32 token = conn->alert_token_local;
71
72 link = &conn->lgr->conns_all.rb_node;
73 while (*link) {
74 struct smc_connection *cur = rb_entry(*link,
75 struct smc_connection, alert_node);
76
77 parent = *link;
78 if (cur->alert_token_local > token)
79 link = &parent->rb_left;
80 else
81 link = &parent->rb_right;
82 }
83 /* Put the new node there */
84 rb_link_node(&conn->alert_node, parent, link);
85 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
86}
87
88/* Register connection in link group by assigning an alert token
89 * registered in a search tree.
90 * Requires @conns_lock
91 * Note that '0' is a reserved value and not assigned.
92 */
93static void smc_lgr_register_conn(struct smc_connection *conn)
94{
95 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
96 static atomic_t nexttoken = ATOMIC_INIT(0);
97
98 /* find a new alert_token_local value not yet used by some connection
99 * in this link group
100 */
101 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
102 while (!conn->alert_token_local) {
103 conn->alert_token_local = atomic_inc_return(&nexttoken);
104 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
105 conn->alert_token_local = 0;
106 }
107 smc_lgr_add_alert_token(conn);
108 conn->lgr->conns_num++;
109}
110
111/* Unregister connection and reset the alert token of the given connection<
112 */
113static void __smc_lgr_unregister_conn(struct smc_connection *conn)
114{
115 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
116 struct smc_link_group *lgr = conn->lgr;
117
118 rb_erase(&conn->alert_node, &lgr->conns_all);
119 lgr->conns_num--;
120 conn->alert_token_local = 0;
0cfdd8f9
UB
121 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
122}
123
fb692ec4 124/* Unregister connection from lgr
0cfdd8f9
UB
125 */
126static void smc_lgr_unregister_conn(struct smc_connection *conn)
127{
128 struct smc_link_group *lgr = conn->lgr;
0cfdd8f9 129
77f838ac
KG
130 if (!lgr)
131 return;
0cfdd8f9
UB
132 write_lock_bh(&lgr->conns_lock);
133 if (conn->alert_token_local) {
0cfdd8f9
UB
134 __smc_lgr_unregister_conn(conn);
135 }
136 write_unlock_bh(&lgr->conns_lock);
0cfdd8f9
UB
137}
138
0d18a0cb
KG
139/* Send delete link, either as client to request the initiation
140 * of the DELETE LINK sequence from server; or as server to
141 * initiate the delete processing. See smc_llc_rx_delete_link().
142 */
143static int smc_link_send_delete(struct smc_link *lnk)
144{
145 if (lnk->state == SMC_LNK_ACTIVE &&
146 !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
147 smc_llc_link_deleting(lnk);
148 return 0;
149 }
150 return -ENOTCONN;
151}
152
3f3f0e36
UB
153static void smc_lgr_free(struct smc_link_group *lgr);
154
0cfdd8f9
UB
155static void smc_lgr_free_work(struct work_struct *work)
156{
157 struct smc_link_group *lgr = container_of(to_delayed_work(work),
158 struct smc_link_group,
159 free_work);
160 bool conns;
161
162 spin_lock_bh(&smc_lgr_list.lock);
163 read_lock_bh(&lgr->conns_lock);
164 conns = RB_EMPTY_ROOT(&lgr->conns_all);
165 read_unlock_bh(&lgr->conns_lock);
166 if (!conns) { /* number of lgr connections is no longer zero */
167 spin_unlock_bh(&smc_lgr_list.lock);
168 return;
169 }
e78b2622
KG
170 if (!list_empty(&lgr->list))
171 list_del_init(&lgr->list); /* remove from smc_lgr_list */
0cfdd8f9 172 spin_unlock_bh(&smc_lgr_list.lock);
0d18a0cb
KG
173
174 if (!lgr->is_smcd && !lgr->terminating) {
90d8b29c
UB
175 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
176
0d18a0cb 177 /* try to send del link msg, on error free lgr immediately */
90d8b29c
UB
178 if (lnk->state == SMC_LNK_ACTIVE &&
179 !smc_link_send_delete(lnk)) {
0d18a0cb
KG
180 /* reschedule in case we never receive a response */
181 smc_lgr_schedule_free_work(lgr);
182 return;
183 }
184 }
185
3cf52eb1 186 if (!delayed_work_pending(&lgr->free_work)) {
0d18a0cb
KG
187 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
188
189 if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
190 smc_llc_link_inactive(lnk);
0512f69e
HW
191 if (lgr->is_smcd)
192 smc_ism_signal_shutdown(lgr);
268ffcc4 193 smc_lgr_free(lgr);
3cf52eb1 194 }
0cfdd8f9
UB
195}
196
197/* create a new SMC link group */
bc36d2fc 198static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
0cfdd8f9
UB
199{
200 struct smc_link_group *lgr;
201 struct smc_link *lnk;
202 u8 rndvec[3];
203 int rc = 0;
cd6851f3 204 int i;
0cfdd8f9 205
bc36d2fc 206 if (ini->is_smcd && ini->vlan_id) {
7a62725a
KG
207 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
208 rc = SMC_CLC_DECL_ISMVLANERR;
c6ba7c9b 209 goto out;
7a62725a 210 }
c6ba7c9b
HW
211 }
212
0cfdd8f9
UB
213 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
214 if (!lgr) {
7a62725a 215 rc = SMC_CLC_DECL_MEM;
0cfdd8f9
UB
216 goto out;
217 }
bc36d2fc 218 lgr->is_smcd = ini->is_smcd;
517c300e 219 lgr->sync_err = 0;
bc36d2fc 220 lgr->vlan_id = ini->vlan_id;
cd6851f3
UB
221 rwlock_init(&lgr->sndbufs_lock);
222 rwlock_init(&lgr->rmbs_lock);
c6ba7c9b 223 rwlock_init(&lgr->conns_lock);
cd6851f3
UB
224 for (i = 0; i < SMC_RMBE_SIZES; i++) {
225 INIT_LIST_HEAD(&lgr->sndbufs[i]);
226 INIT_LIST_HEAD(&lgr->rmbs[i]);
227 }
9fda3510
HW
228 smc_lgr_list.num += SMC_LGR_NUM_INCR;
229 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
0cfdd8f9
UB
230 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
231 lgr->conns_all = RB_ROOT;
bc36d2fc 232 if (ini->is_smcd) {
c6ba7c9b 233 /* SMC-D specific settings */
bc36d2fc
KG
234 lgr->peer_gid = ini->ism_gid;
235 lgr->smcd = ini->ism_dev;
c6ba7c9b
HW
236 } else {
237 /* SMC-R specific settings */
238 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
bc36d2fc
KG
239 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
240 SMC_SYSTEMID_LEN);
c6ba7c9b
HW
241
242 lnk = &lgr->lnk[SMC_SINGLE_LINK];
243 /* initialize link */
244 lnk->state = SMC_LNK_ACTIVATING;
245 lnk->link_id = SMC_SINGLE_LINK;
bc36d2fc
KG
246 lnk->smcibdev = ini->ib_dev;
247 lnk->ibport = ini->ib_port;
248 lnk->path_mtu =
249 ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
250 if (!ini->ib_dev->initialized)
251 smc_ib_setup_per_ibdev(ini->ib_dev);
c6ba7c9b
HW
252 get_random_bytes(rndvec, sizeof(rndvec));
253 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
254 (rndvec[2] << 16);
7005ada6 255 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
bc36d2fc
KG
256 ini->vlan_id, lnk->gid,
257 &lnk->sgid_index);
7005ada6
UB
258 if (rc)
259 goto free_lgr;
c6ba7c9b
HW
260 rc = smc_llc_link_init(lnk);
261 if (rc)
262 goto free_lgr;
263 rc = smc_wr_alloc_link_mem(lnk);
264 if (rc)
265 goto clear_llc_lnk;
266 rc = smc_ib_create_protection_domain(lnk);
267 if (rc)
268 goto free_link_mem;
269 rc = smc_ib_create_queue_pair(lnk);
270 if (rc)
271 goto dealloc_pd;
272 rc = smc_wr_create_link(lnk);
273 if (rc)
274 goto destroy_qp;
275 }
0cfdd8f9 276 smc->conn.lgr = lgr;
0cfdd8f9
UB
277 spin_lock_bh(&smc_lgr_list.lock);
278 list_add(&lgr->list, &smc_lgr_list.list);
279 spin_unlock_bh(&smc_lgr_list.lock);
f38ba179
UB
280 return 0;
281
bd4ad577
UB
282destroy_qp:
283 smc_ib_destroy_queue_pair(lnk);
284dealloc_pd:
285 smc_ib_dealloc_protection_domain(lnk);
286free_link_mem:
287 smc_wr_free_link_mem(lnk);
2a4c57a9
KG
288clear_llc_lnk:
289 smc_llc_link_clear(lnk);
f38ba179
UB
290free_lgr:
291 kfree(lgr);
0cfdd8f9 292out:
7a62725a
KG
293 if (rc < 0) {
294 if (rc == -ENOMEM)
295 rc = SMC_CLC_DECL_MEM;
296 else
297 rc = SMC_CLC_DECL_INTERR;
298 }
0cfdd8f9
UB
299 return rc;
300}
301
fb692ec4
KG
302static void smc_buf_unuse(struct smc_connection *conn,
303 struct smc_link_group *lgr)
cd6851f3 304{
69cb7dc0 305 if (conn->sndbuf_desc)
cd6851f3 306 conn->sndbuf_desc->used = 0;
cd6851f3 307 if (conn->rmb_desc) {
a6920d1d 308 if (!conn->rmb_desc->regerr) {
c7674c00
KG
309 if (!lgr->is_smcd) {
310 /* unregister rmb with peer */
311 smc_llc_do_delete_rkey(
312 &lgr->lnk[SMC_SINGLE_LINK],
313 conn->rmb_desc);
314 }
a5e04318 315 conn->rmb_desc->used = 0;
a6920d1d
KG
316 } else {
317 /* buf registration failed, reuse not possible */
a6920d1d
KG
318 write_lock_bh(&lgr->rmbs_lock);
319 list_del(&conn->rmb_desc->list);
320 write_unlock_bh(&lgr->rmbs_lock);
321
6511aad3 322 smc_buf_free(lgr, true, conn->rmb_desc);
a6920d1d 323 }
cd6851f3
UB
324 }
325}
326
0cfdd8f9
UB
327/* remove a finished connection from its link group */
328void smc_conn_free(struct smc_connection *conn)
329{
fb692ec4
KG
330 struct smc_link_group *lgr = conn->lgr;
331
332 if (!lgr)
0cfdd8f9 333 return;
fb692ec4 334 if (lgr->is_smcd) {
c6ba7c9b 335 smc_ism_unset_conn(conn);
be244f28
HW
336 tasklet_kill(&conn->rx_tsklet);
337 } else {
c6ba7c9b 338 smc_cdc_tx_dismiss_slots(conn);
be244f28 339 }
8fc002b0 340 smc_lgr_unregister_conn(conn);
fb692ec4 341 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
8fc002b0 342 conn->lgr = NULL;
fb692ec4
KG
343
344 if (!lgr->conns_num)
345 smc_lgr_schedule_free_work(lgr);
0cfdd8f9
UB
346}
347
348static void smc_link_clear(struct smc_link *lnk)
349{
350 lnk->peer_qpn = 0;
2a4c57a9 351 smc_llc_link_clear(lnk);
bd4ad577 352 smc_ib_modify_qp_reset(lnk);
f38ba179 353 smc_wr_free_link(lnk);
bd4ad577
UB
354 smc_ib_destroy_queue_pair(lnk);
355 smc_ib_dealloc_protection_domain(lnk);
f38ba179 356 smc_wr_free_link_mem(lnk);
0cfdd8f9
UB
357}
358
c6ba7c9b
HW
359static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
360 struct smc_buf_desc *buf_desc)
cd6851f3 361{
6511aad3
HW
362 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
363
3e034725
UB
364 if (is_rmb) {
365 if (buf_desc->mr_rx[SMC_SINGLE_LINK])
366 smc_ib_put_memory_region(
367 buf_desc->mr_rx[SMC_SINGLE_LINK]);
368 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
369 DMA_FROM_DEVICE);
370 } else {
371 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
372 DMA_TO_DEVICE);
cd6851f3 373 }
3e034725 374 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
2ef4f27a
SR
375 if (buf_desc->pages)
376 __free_pages(buf_desc->pages, buf_desc->order);
3e034725 377 kfree(buf_desc);
cd6851f3
UB
378}
379
c6ba7c9b
HW
380static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
381 struct smc_buf_desc *buf_desc)
382{
be244f28
HW
383 if (is_dmb) {
384 /* restore original buf len */
385 buf_desc->len += sizeof(struct smcd_cdc_msg);
c6ba7c9b 386 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
be244f28 387 } else {
c6ba7c9b 388 kfree(buf_desc->cpu_addr);
be244f28 389 }
c6ba7c9b
HW
390 kfree(buf_desc);
391}
392
393static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
394 struct smc_buf_desc *buf_desc)
395{
396 if (lgr->is_smcd)
397 smcd_buf_free(lgr, is_rmb, buf_desc);
398 else
399 smcr_buf_free(lgr, is_rmb, buf_desc);
400}
401
3e034725 402static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
cd6851f3 403{
3e034725
UB
404 struct smc_buf_desc *buf_desc, *bf_desc;
405 struct list_head *buf_list;
cd6851f3
UB
406 int i;
407
408 for (i = 0; i < SMC_RMBE_SIZES; i++) {
3e034725
UB
409 if (is_rmb)
410 buf_list = &lgr->rmbs[i];
411 else
412 buf_list = &lgr->sndbufs[i];
413 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
cd6851f3 414 list) {
3e034725 415 list_del(&buf_desc->list);
6511aad3 416 smc_buf_free(lgr, is_rmb, buf_desc);
cd6851f3
UB
417 }
418 }
419}
420
3e034725
UB
421static void smc_lgr_free_bufs(struct smc_link_group *lgr)
422{
423 /* free send buffers */
424 __smc_lgr_free_bufs(lgr, false);
425 /* free rmbs */
426 __smc_lgr_free_bufs(lgr, true);
427}
428
0cfdd8f9 429/* remove a link group */
3f3f0e36 430static void smc_lgr_free(struct smc_link_group *lgr)
0cfdd8f9 431{
3e034725 432 smc_lgr_free_bufs(lgr);
c6ba7c9b
HW
433 if (lgr->is_smcd)
434 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
435 else
436 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
0cfdd8f9
UB
437 kfree(lgr);
438}
439
9651b934
KG
440void smc_lgr_forget(struct smc_link_group *lgr)
441{
442 spin_lock_bh(&smc_lgr_list.lock);
443 /* do not use this link group for new connections */
444 if (!list_empty(&lgr->list))
445 list_del_init(&lgr->list);
446 spin_unlock_bh(&smc_lgr_list.lock);
447}
448
0cfdd8f9 449/* terminate linkgroup abnormally */
b9f227c3 450static void __smc_lgr_terminate(struct smc_link_group *lgr)
0cfdd8f9
UB
451{
452 struct smc_connection *conn;
b38d7324 453 struct smc_sock *smc;
0cfdd8f9
UB
454 struct rb_node *node;
455
517c300e
KG
456 if (lgr->terminating)
457 return; /* lgr already terminating */
458 lgr->terminating = 1;
b9f227c3
HW
459 if (!list_empty(&lgr->list)) /* forget lgr */
460 list_del_init(&lgr->list);
c6ba7c9b
HW
461 if (!lgr->is_smcd)
462 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
0cfdd8f9
UB
463
464 write_lock_bh(&lgr->conns_lock);
465 node = rb_first(&lgr->conns_all);
466 while (node) {
467 conn = rb_entry(node, struct smc_connection, alert_node);
b38d7324 468 smc = container_of(conn, struct smc_sock, conn);
51f1de79 469 sock_hold(&smc->sk); /* sock_put in close work */
732720fa 470 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
0cfdd8f9 471 __smc_lgr_unregister_conn(conn);
8fc002b0 472 conn->lgr = NULL;
732720fa 473 write_unlock_bh(&lgr->conns_lock);
51f1de79
UB
474 if (!schedule_work(&conn->close_work))
475 sock_put(&smc->sk);
732720fa 476 write_lock_bh(&lgr->conns_lock);
0cfdd8f9
UB
477 node = rb_first(&lgr->conns_all);
478 }
479 write_unlock_bh(&lgr->conns_lock);
c6ba7c9b
HW
480 if (!lgr->is_smcd)
481 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
97cdbc42 482 smc_lgr_schedule_free_work(lgr);
0cfdd8f9
UB
483}
484
b9f227c3
HW
485void smc_lgr_terminate(struct smc_link_group *lgr)
486{
487 spin_lock_bh(&smc_lgr_list.lock);
488 __smc_lgr_terminate(lgr);
489 spin_unlock_bh(&smc_lgr_list.lock);
490}
491
9fda3510
HW
492/* Called when IB port is terminated */
493void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
494{
495 struct smc_link_group *lgr, *l;
496
b9f227c3 497 spin_lock_bh(&smc_lgr_list.lock);
9fda3510 498 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
c6ba7c9b
HW
499 if (!lgr->is_smcd &&
500 lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
9fda3510 501 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
b9f227c3 502 __smc_lgr_terminate(lgr);
9fda3510 503 }
b9f227c3 504 spin_unlock_bh(&smc_lgr_list.lock);
9fda3510
HW
505}
506
c6ba7c9b 507/* Called when SMC-D device is terminated or peer is lost */
0512f69e 508void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
c6ba7c9b
HW
509{
510 struct smc_link_group *lgr, *l;
511 LIST_HEAD(lgr_free_list);
512
513 /* run common cleanup function and build free list */
514 spin_lock_bh(&smc_lgr_list.lock);
515 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
516 if (lgr->is_smcd && lgr->smcd == dev &&
517 (!peer_gid || lgr->peer_gid == peer_gid) &&
0512f69e 518 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
c6ba7c9b
HW
519 __smc_lgr_terminate(lgr);
520 list_move(&lgr->list, &lgr_free_list);
521 }
522 }
523 spin_unlock_bh(&smc_lgr_list.lock);
524
525 /* cancel the regular free workers and actually free lgrs */
526 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
527 list_del_init(&lgr->list);
528 cancel_delayed_work_sync(&lgr->free_work);
0512f69e
HW
529 if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
530 smc_ism_signal_shutdown(lgr);
c6ba7c9b
HW
531 smc_lgr_free(lgr);
532 }
533}
534
0cfdd8f9
UB
535/* Determine vlan of internal TCP socket.
536 * @vlan_id: address to store the determined vlan id into
537 */
bc36d2fc 538int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
0cfdd8f9
UB
539{
540 struct dst_entry *dst = sk_dst_get(clcsock->sk);
cb9d43f6
UB
541 struct net_device *ndev;
542 int i, nest_lvl, rc = 0;
0cfdd8f9 543
bc36d2fc 544 ini->vlan_id = 0;
0cfdd8f9
UB
545 if (!dst) {
546 rc = -ENOTCONN;
547 goto out;
548 }
549 if (!dst->dev) {
550 rc = -ENODEV;
551 goto out_rel;
552 }
553
cb9d43f6
UB
554 ndev = dst->dev;
555 if (is_vlan_dev(ndev)) {
bc36d2fc 556 ini->vlan_id = vlan_dev_vlan_id(ndev);
cb9d43f6
UB
557 goto out_rel;
558 }
559
560 rtnl_lock();
561 nest_lvl = dev_get_nest_level(ndev);
562 for (i = 0; i < nest_lvl; i++) {
563 struct list_head *lower = &ndev->adj_list.lower;
564
565 if (list_empty(lower))
566 break;
567 lower = lower->next;
568 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
569 if (is_vlan_dev(ndev)) {
bc36d2fc 570 ini->vlan_id = vlan_dev_vlan_id(ndev);
cb9d43f6
UB
571 break;
572 }
573 }
574 rtnl_unlock();
0cfdd8f9
UB
575
576out_rel:
577 dst_release(dst);
578out:
579 return rc;
580}
581
c6ba7c9b
HW
582static bool smcr_lgr_match(struct smc_link_group *lgr,
583 struct smc_clc_msg_local *lcl,
ee05ff7a 584 enum smc_lgr_role role, u32 clcqpn)
0cfdd8f9 585{
c6ba7c9b
HW
586 return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
587 SMC_SYSTEMID_LEN) &&
588 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
589 SMC_GID_SIZE) &&
590 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
591 sizeof(lcl->mac)) &&
ee05ff7a
KG
592 lgr->role == role &&
593 (lgr->role == SMC_SERV ||
594 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
c6ba7c9b 595}
0cfdd8f9 596
c6ba7c9b
HW
597static bool smcd_lgr_match(struct smc_link_group *lgr,
598 struct smcd_dev *smcismdev, u64 peer_gid)
599{
600 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
0cfdd8f9
UB
601}
602
603/* create a new SMC connection (and a new link group if necessary) */
bc36d2fc 604int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
0cfdd8f9
UB
605{
606 struct smc_connection *conn = &smc->conn;
607 struct smc_link_group *lgr;
0cfdd8f9 608 enum smc_lgr_role role;
0cfdd8f9
UB
609 int rc = 0;
610
7a62725a 611 ini->cln_first_contact = SMC_FIRST_CONTACT;
0cfdd8f9 612 role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
bc36d2fc 613 if (role == SMC_CLNT && ini->srv_first_contact)
0cfdd8f9
UB
614 /* create new link group as well */
615 goto create;
616
617 /* determine if an existing link group can be reused */
618 spin_lock_bh(&smc_lgr_list.lock);
619 list_for_each_entry(lgr, &smc_lgr_list.list, list) {
620 write_lock_bh(&lgr->conns_lock);
bc36d2fc
KG
621 if ((ini->is_smcd ?
622 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
623 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
0cfdd8f9 624 !lgr->sync_err &&
bc36d2fc 625 lgr->vlan_id == ini->vlan_id &&
c6ba7c9b
HW
626 (role == SMC_CLNT ||
627 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
0cfdd8f9 628 /* link group found */
7a62725a 629 ini->cln_first_contact = SMC_REUSE_CONTACT;
0cfdd8f9
UB
630 conn->lgr = lgr;
631 smc_lgr_register_conn(conn); /* add smc conn to lgr */
77f838ac
KG
632 if (delayed_work_pending(&lgr->free_work))
633 cancel_delayed_work(&lgr->free_work);
0cfdd8f9
UB
634 write_unlock_bh(&lgr->conns_lock);
635 break;
636 }
637 write_unlock_bh(&lgr->conns_lock);
638 }
639 spin_unlock_bh(&smc_lgr_list.lock);
640
bc36d2fc 641 if (role == SMC_CLNT && !ini->srv_first_contact &&
7a62725a 642 ini->cln_first_contact == SMC_FIRST_CONTACT) {
0cfdd8f9
UB
643 /* Server reuses a link group, but Client wants to start
644 * a new one
645 * send out_of_sync decline, reason synchr. error
646 */
7a62725a 647 return SMC_CLC_DECL_SYNCERR;
0cfdd8f9
UB
648 }
649
650create:
7a62725a 651 if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
bc36d2fc 652 rc = smc_lgr_create(smc, ini);
0cfdd8f9
UB
653 if (rc)
654 goto out;
655 smc_lgr_register_conn(conn); /* add smc conn to lgr */
0cfdd8f9 656 }
5f08318f 657 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
cbba07a7 658 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
de8474eb 659 conn->urg_state = SMC_URG_READ;
bc36d2fc 660 if (ini->is_smcd) {
be244f28
HW
661 conn->rx_off = sizeof(struct smcd_cdc_msg);
662 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
663 }
5f08318f
UB
664#ifndef KERNEL_HAS_ATOMIC64
665 spin_lock_init(&conn->acurs_lock);
666#endif
0cfdd8f9
UB
667
668out:
7a62725a 669 return rc;
0cfdd8f9 670}
cd6851f3 671
2f6becaf
HW
672/* convert the RMB size into the compressed notation - minimum 16K.
673 * In contrast to plain ilog2, this rounds towards the next power of 2,
674 * so the socket application gets at least its desired sndbuf / rcvbuf size.
675 */
676static u8 smc_compress_bufsize(int size)
677{
678 u8 compressed;
679
680 if (size <= SMC_BUF_MIN_SIZE)
681 return 0;
682
683 size = (size - 1) >> 14;
684 compressed = ilog2(size) + 1;
685 if (compressed >= SMC_RMBE_SIZES)
686 compressed = SMC_RMBE_SIZES - 1;
687 return compressed;
688}
689
690/* convert the RMB size from compressed notation into integer */
691int smc_uncompress_bufsize(u8 compressed)
692{
693 u32 size;
694
695 size = 0x00000001 << (((int)compressed) + 14);
696 return (int)size;
697}
698
3e034725
UB
699/* try to reuse a sndbuf or rmb description slot for a certain
700 * buffer size; if not available, return NULL
cd6851f3 701 */
8437bda0
HW
702static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
703 rwlock_t *lock,
704 struct list_head *buf_list)
cd6851f3 705{
3e034725 706 struct smc_buf_desc *buf_slot;
cd6851f3 707
3e034725
UB
708 read_lock_bh(lock);
709 list_for_each_entry(buf_slot, buf_list, list) {
710 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
711 read_unlock_bh(lock);
712 return buf_slot;
cd6851f3
UB
713 }
714 }
3e034725 715 read_unlock_bh(lock);
cd6851f3
UB
716 return NULL;
717}
718
952310cc
UB
719/* one of the conditions for announcing a receiver's current window size is
720 * that it "results in a minimum increase in the window size of 10% of the
721 * receive buffer space" [RFC7609]
722 */
723static inline int smc_rmb_wnd_update_limit(int rmbe_size)
724{
725 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
726}
727
c6ba7c9b
HW
728static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
729 bool is_rmb, int bufsize)
b33982c3
UB
730{
731 struct smc_buf_desc *buf_desc;
732 struct smc_link *lnk;
733 int rc;
734
735 /* try to alloc a new buffer */
736 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
737 if (!buf_desc)
738 return ERR_PTR(-ENOMEM);
739
2ef4f27a
SR
740 buf_desc->order = get_order(bufsize);
741 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
742 __GFP_NOMEMALLOC | __GFP_COMP |
743 __GFP_NORETRY | __GFP_ZERO,
744 buf_desc->order);
745 if (!buf_desc->pages) {
b33982c3
UB
746 kfree(buf_desc);
747 return ERR_PTR(-EAGAIN);
748 }
2ef4f27a 749 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
b33982c3
UB
750
751 /* build the sg table from the pages */
752 lnk = &lgr->lnk[SMC_SINGLE_LINK];
753 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
754 GFP_KERNEL);
755 if (rc) {
6511aad3 756 smc_buf_free(lgr, is_rmb, buf_desc);
b33982c3
UB
757 return ERR_PTR(rc);
758 }
759 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
760 buf_desc->cpu_addr, bufsize);
761
762 /* map sg table to DMA address */
763 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
764 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
765 /* SMC protocol depends on mapping to one DMA address only */
766 if (rc != 1) {
6511aad3 767 smc_buf_free(lgr, is_rmb, buf_desc);
b33982c3
UB
768 return ERR_PTR(-EAGAIN);
769 }
770
771 /* create a new memory region for the RMB */
772 if (is_rmb) {
773 rc = smc_ib_get_memory_region(lnk->roce_pd,
774 IB_ACCESS_REMOTE_WRITE |
775 IB_ACCESS_LOCAL_WRITE,
776 buf_desc);
777 if (rc) {
6511aad3 778 smc_buf_free(lgr, is_rmb, buf_desc);
b33982c3
UB
779 return ERR_PTR(rc);
780 }
781 }
782
69cb7dc0 783 buf_desc->len = bufsize;
b33982c3
UB
784 return buf_desc;
785}
786
c6ba7c9b
HW
787#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
788
789static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
790 bool is_dmb, int bufsize)
791{
792 struct smc_buf_desc *buf_desc;
793 int rc;
794
795 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
796 return ERR_PTR(-EAGAIN);
797
798 /* try to alloc a new DMB */
799 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
800 if (!buf_desc)
801 return ERR_PTR(-ENOMEM);
802 if (is_dmb) {
803 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
804 if (rc) {
805 kfree(buf_desc);
806 return ERR_PTR(-EAGAIN);
807 }
be244f28
HW
808 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
809 /* CDC header stored in buf. So, pretend it was smaller */
810 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
c6ba7c9b
HW
811 } else {
812 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
813 __GFP_NOWARN | __GFP_NORETRY |
814 __GFP_NOMEMALLOC);
815 if (!buf_desc->cpu_addr) {
816 kfree(buf_desc);
817 return ERR_PTR(-EAGAIN);
818 }
819 buf_desc->len = bufsize;
820 }
821 return buf_desc;
822}
823
824static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
cd6851f3 825{
8437bda0 826 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
cd6851f3
UB
827 struct smc_connection *conn = &smc->conn;
828 struct smc_link_group *lgr = conn->lgr;
3e034725 829 struct list_head *buf_list;
c45abf31 830 int bufsize, bufsize_short;
3e034725
UB
831 int sk_buf_size;
832 rwlock_t *lock;
cd6851f3 833
3e034725
UB
834 if (is_rmb)
835 /* use socket recv buffer size (w/o overhead) as start value */
836 sk_buf_size = smc->sk.sk_rcvbuf / 2;
837 else
838 /* use socket send buffer size (w/o overhead) as start value */
839 sk_buf_size = smc->sk.sk_sndbuf / 2;
840
4e1061f4 841 for (bufsize_short = smc_compress_bufsize(sk_buf_size);
c45abf31 842 bufsize_short >= 0; bufsize_short--) {
9d8fb617 843
3e034725
UB
844 if (is_rmb) {
845 lock = &lgr->rmbs_lock;
846 buf_list = &lgr->rmbs[bufsize_short];
847 } else {
848 lock = &lgr->sndbufs_lock;
849 buf_list = &lgr->sndbufs[bufsize_short];
9d8fb617 850 }
c45abf31 851 bufsize = smc_uncompress_bufsize(bufsize_short);
a3fe3d01
UB
852 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
853 continue;
854
3e034725 855 /* check for reusable slot in the link group */
8437bda0 856 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
3e034725
UB
857 if (buf_desc) {
858 memset(buf_desc->cpu_addr, 0, bufsize);
cd6851f3
UB
859 break; /* found reusable slot */
860 }
a3fe3d01 861
c6ba7c9b
HW
862 if (is_smcd)
863 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
864 else
865 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
866
b33982c3
UB
867 if (PTR_ERR(buf_desc) == -ENOMEM)
868 break;
869 if (IS_ERR(buf_desc))
a3fe3d01 870 continue;
897e1c24 871
3e034725
UB
872 buf_desc->used = 1;
873 write_lock_bh(lock);
874 list_add(&buf_desc->list, buf_list);
875 write_unlock_bh(lock);
876 break; /* found */
cd6851f3 877 }
3e034725 878
b33982c3 879 if (IS_ERR(buf_desc))
3e034725
UB
880 return -ENOMEM;
881
882 if (is_rmb) {
883 conn->rmb_desc = buf_desc;
c45abf31
UB
884 conn->rmbe_size_short = bufsize_short;
885 smc->sk.sk_rcvbuf = bufsize * 2;
5f08318f 886 atomic_set(&conn->bytes_to_rcv, 0);
be244f28
HW
887 conn->rmbe_update_limit =
888 smc_rmb_wnd_update_limit(buf_desc->len);
c6ba7c9b
HW
889 if (is_smcd)
890 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
cd6851f3 891 } else {
3e034725 892 conn->sndbuf_desc = buf_desc;
3e034725
UB
893 smc->sk.sk_sndbuf = bufsize * 2;
894 atomic_set(&conn->sndbuf_space, bufsize);
cd6851f3 895 }
3e034725
UB
896 return 0;
897}
898
10428dd8
UB
899void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
900{
901 struct smc_link_group *lgr = conn->lgr;
902
c6ba7c9b
HW
903 if (!conn->lgr || conn->lgr->is_smcd)
904 return;
10428dd8
UB
905 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
906 conn->sndbuf_desc, DMA_TO_DEVICE);
907}
908
909void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
910{
911 struct smc_link_group *lgr = conn->lgr;
912
c6ba7c9b
HW
913 if (!conn->lgr || conn->lgr->is_smcd)
914 return;
10428dd8
UB
915 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
916 conn->sndbuf_desc, DMA_TO_DEVICE);
917}
918
919void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
920{
921 struct smc_link_group *lgr = conn->lgr;
922
c6ba7c9b
HW
923 if (!conn->lgr || conn->lgr->is_smcd)
924 return;
10428dd8
UB
925 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
926 conn->rmb_desc, DMA_FROM_DEVICE);
927}
928
929void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
930{
931 struct smc_link_group *lgr = conn->lgr;
932
c6ba7c9b
HW
933 if (!conn->lgr || conn->lgr->is_smcd)
934 return;
10428dd8
UB
935 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
936 conn->rmb_desc, DMA_FROM_DEVICE);
937}
938
3e034725
UB
939/* create the send and receive buffer for an SMC socket;
940 * receive buffers are called RMBs;
941 * (even though the SMC protocol allows more than one RMB-element per RMB,
942 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
943 * extra RMB for every connection in a link group
944 */
c6ba7c9b 945int smc_buf_create(struct smc_sock *smc, bool is_smcd)
3e034725
UB
946{
947 int rc;
948
949 /* create send buffer */
c6ba7c9b 950 rc = __smc_buf_create(smc, is_smcd, false);
3e034725
UB
951 if (rc)
952 return rc;
953 /* create rmb */
c6ba7c9b 954 rc = __smc_buf_create(smc, is_smcd, true);
3e034725 955 if (rc)
6511aad3 956 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
3e034725 957 return rc;
cd6851f3 958}
bd4ad577
UB
959
960static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
961{
962 int i;
963
964 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
965 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
966 return i;
967 }
968 return -ENOSPC;
969}
970
4ed75de5
KG
971/* add a new rtoken from peer */
972int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
bd4ad577 973{
4ed75de5
KG
974 u64 dma_addr = be64_to_cpu(nw_vaddr);
975 u32 rkey = ntohl(nw_rkey);
bd4ad577
UB
976 int i;
977
978 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
979 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
263eec9b 980 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
bd4ad577 981 test_bit(i, lgr->rtokens_used_mask)) {
4ed75de5
KG
982 /* already in list */
983 return i;
984 }
985 }
986 i = smc_rmb_reserve_rtoken_idx(lgr);
987 if (i < 0)
988 return i;
989 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
990 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
991 return i;
992}
993
994/* delete an rtoken */
995int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
996{
997 u32 rkey = ntohl(nw_rkey);
998 int i;
999
1000 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1001 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
1002 test_bit(i, lgr->rtokens_used_mask)) {
1003 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
1004 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
1005
1006 clear_bit(i, lgr->rtokens_used_mask);
bd4ad577
UB
1007 return 0;
1008 }
1009 }
4ed75de5
KG
1010 return -ENOENT;
1011}
1012
1013/* save rkey and dma_addr received from peer during clc handshake */
1014int smc_rmb_rtoken_handling(struct smc_connection *conn,
1015 struct smc_clc_msg_accept_confirm *clc)
1016{
1017 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
1018 clc->rmb_rkey);
bd4ad577
UB
1019 if (conn->rtoken_idx < 0)
1020 return conn->rtoken_idx;
bd4ad577
UB
1021 return 0;
1022}
9fda3510
HW
1023
1024/* Called (from smc_exit) when module is removed */
1025void smc_core_exit(void)
1026{
1027 struct smc_link_group *lgr, *lg;
1028 LIST_HEAD(lgr_freeing_list);
1029
1030 spin_lock_bh(&smc_lgr_list.lock);
1031 if (!list_empty(&smc_lgr_list.list))
1032 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
1033 spin_unlock_bh(&smc_lgr_list.lock);
1034 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
1035 list_del_init(&lgr->list);
0d18a0cb
KG
1036 if (!lgr->is_smcd) {
1037 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
1038
1039 if (lnk->state == SMC_LNK_ACTIVE)
1040 smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
1041 false);
1042 smc_llc_link_inactive(lnk);
1043 }
9fda3510 1044 cancel_delayed_work_sync(&lgr->free_work);
0512f69e
HW
1045 if (lgr->is_smcd)
1046 smc_ism_signal_shutdown(lgr);
9fda3510
HW
1047 smc_lgr_free(lgr); /* free link group */
1048 }
1049}