ASoC: SOF: Drop superfluous snd_pcm_sgbuf_ops_page
[linux-2.6-block.git] / net / smc / smc_core.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
0cfdd8f9
UB
2/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * Basic Transport Functions exploiting Infiniband API
6 *
7 * Copyright IBM Corp. 2016
8 *
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
10 */
11
12#include <linux/socket.h>
13#include <linux/if_vlan.h>
14#include <linux/random.h>
15#include <linux/workqueue.h>
16#include <net/tcp.h>
17#include <net/sock.h>
18#include <rdma/ib_verbs.h>
ddb457c6 19#include <rdma/ib_cache.h>
0cfdd8f9
UB
20
21#include "smc.h"
22#include "smc_clc.h"
23#include "smc_core.h"
24#include "smc_ib.h"
f38ba179 25#include "smc_wr.h"
9bf9abea 26#include "smc_llc.h"
5f08318f 27#include "smc_cdc.h"
b38d7324 28#include "smc_close.h"
c6ba7c9b 29#include "smc_ism.h"
0cfdd8f9 30
5bc11ddb
UB
31#define SMC_LGR_NUM_INCR 256
32#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
7f58a1ad 33#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
0d18a0cb 34#define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
0cfdd8f9 35
9fda3510
HW
36static struct smc_lgr_list smc_lgr_list = { /* established link groups */
37 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
38 .list = LIST_HEAD_INIT(smc_lgr_list.list),
39 .num = 0,
40};
9bf9abea 41
6511aad3
HW
42static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
43 struct smc_buf_desc *buf_desc);
a6920d1d 44
97cdbc42
KG
45static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
46{
47 /* client link group creation always follows the server link group
48 * creation. For client use a somewhat higher removal delay time,
49 * otherwise there is a risk of out-of-sync link groups.
50 */
51 mod_delayed_work(system_wq, &lgr->free_work,
c6ba7c9b
HW
52 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
53 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
97cdbc42
KG
54}
55
0d18a0cb
KG
56void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
57{
58 mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
97cdbc42
KG
59}
60
0cfdd8f9
UB
61/* Register connection's alert token in our lookup structure.
62 * To use rbtrees we have to implement our own insert core.
63 * Requires @conns_lock
64 * @smc connection to register
65 * Returns 0 on success, != otherwise.
66 */
67static void smc_lgr_add_alert_token(struct smc_connection *conn)
68{
69 struct rb_node **link, *parent = NULL;
70 u32 token = conn->alert_token_local;
71
72 link = &conn->lgr->conns_all.rb_node;
73 while (*link) {
74 struct smc_connection *cur = rb_entry(*link,
75 struct smc_connection, alert_node);
76
77 parent = *link;
78 if (cur->alert_token_local > token)
79 link = &parent->rb_left;
80 else
81 link = &parent->rb_right;
82 }
83 /* Put the new node there */
84 rb_link_node(&conn->alert_node, parent, link);
85 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
86}
87
88/* Register connection in link group by assigning an alert token
89 * registered in a search tree.
90 * Requires @conns_lock
91 * Note that '0' is a reserved value and not assigned.
92 */
93static void smc_lgr_register_conn(struct smc_connection *conn)
94{
95 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
96 static atomic_t nexttoken = ATOMIC_INIT(0);
97
98 /* find a new alert_token_local value not yet used by some connection
99 * in this link group
100 */
101 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
102 while (!conn->alert_token_local) {
103 conn->alert_token_local = atomic_inc_return(&nexttoken);
104 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
105 conn->alert_token_local = 0;
106 }
107 smc_lgr_add_alert_token(conn);
108 conn->lgr->conns_num++;
109}
110
111/* Unregister connection and reset the alert token of the given connection<
112 */
113static void __smc_lgr_unregister_conn(struct smc_connection *conn)
114{
115 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
116 struct smc_link_group *lgr = conn->lgr;
117
118 rb_erase(&conn->alert_node, &lgr->conns_all);
119 lgr->conns_num--;
120 conn->alert_token_local = 0;
0cfdd8f9
UB
121 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
122}
123
fb692ec4 124/* Unregister connection from lgr
0cfdd8f9
UB
125 */
126static void smc_lgr_unregister_conn(struct smc_connection *conn)
127{
128 struct smc_link_group *lgr = conn->lgr;
0cfdd8f9 129
77f838ac
KG
130 if (!lgr)
131 return;
0cfdd8f9
UB
132 write_lock_bh(&lgr->conns_lock);
133 if (conn->alert_token_local) {
0cfdd8f9
UB
134 __smc_lgr_unregister_conn(conn);
135 }
136 write_unlock_bh(&lgr->conns_lock);
0cfdd8f9
UB
137}
138
0d18a0cb
KG
139/* Send delete link, either as client to request the initiation
140 * of the DELETE LINK sequence from server; or as server to
141 * initiate the delete processing. See smc_llc_rx_delete_link().
142 */
143static int smc_link_send_delete(struct smc_link *lnk)
144{
145 if (lnk->state == SMC_LNK_ACTIVE &&
146 !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
147 smc_llc_link_deleting(lnk);
148 return 0;
149 }
150 return -ENOTCONN;
151}
152
3f3f0e36
UB
153static void smc_lgr_free(struct smc_link_group *lgr);
154
0cfdd8f9
UB
155static void smc_lgr_free_work(struct work_struct *work)
156{
157 struct smc_link_group *lgr = container_of(to_delayed_work(work),
158 struct smc_link_group,
159 free_work);
160 bool conns;
161
162 spin_lock_bh(&smc_lgr_list.lock);
163 read_lock_bh(&lgr->conns_lock);
164 conns = RB_EMPTY_ROOT(&lgr->conns_all);
165 read_unlock_bh(&lgr->conns_lock);
166 if (!conns) { /* number of lgr connections is no longer zero */
167 spin_unlock_bh(&smc_lgr_list.lock);
168 return;
169 }
e78b2622
KG
170 if (!list_empty(&lgr->list))
171 list_del_init(&lgr->list); /* remove from smc_lgr_list */
0cfdd8f9 172 spin_unlock_bh(&smc_lgr_list.lock);
0d18a0cb
KG
173
174 if (!lgr->is_smcd && !lgr->terminating) {
90d8b29c
UB
175 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
176
0d18a0cb 177 /* try to send del link msg, on error free lgr immediately */
90d8b29c
UB
178 if (lnk->state == SMC_LNK_ACTIVE &&
179 !smc_link_send_delete(lnk)) {
0d18a0cb
KG
180 /* reschedule in case we never receive a response */
181 smc_lgr_schedule_free_work(lgr);
182 return;
183 }
184 }
185
3cf52eb1 186 if (!delayed_work_pending(&lgr->free_work)) {
0d18a0cb
KG
187 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
188
189 if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
190 smc_llc_link_inactive(lnk);
0512f69e
HW
191 if (lgr->is_smcd)
192 smc_ism_signal_shutdown(lgr);
268ffcc4 193 smc_lgr_free(lgr);
3cf52eb1 194 }
0cfdd8f9
UB
195}
196
197/* create a new SMC link group */
bc36d2fc 198static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
0cfdd8f9
UB
199{
200 struct smc_link_group *lgr;
201 struct smc_link *lnk;
202 u8 rndvec[3];
203 int rc = 0;
cd6851f3 204 int i;
0cfdd8f9 205
bc36d2fc 206 if (ini->is_smcd && ini->vlan_id) {
7a62725a
KG
207 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
208 rc = SMC_CLC_DECL_ISMVLANERR;
c6ba7c9b 209 goto out;
7a62725a 210 }
c6ba7c9b
HW
211 }
212
0cfdd8f9
UB
213 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
214 if (!lgr) {
7a62725a 215 rc = SMC_CLC_DECL_MEM;
0cfdd8f9
UB
216 goto out;
217 }
bc36d2fc 218 lgr->is_smcd = ini->is_smcd;
517c300e 219 lgr->sync_err = 0;
bc36d2fc 220 lgr->vlan_id = ini->vlan_id;
cd6851f3
UB
221 rwlock_init(&lgr->sndbufs_lock);
222 rwlock_init(&lgr->rmbs_lock);
c6ba7c9b 223 rwlock_init(&lgr->conns_lock);
cd6851f3
UB
224 for (i = 0; i < SMC_RMBE_SIZES; i++) {
225 INIT_LIST_HEAD(&lgr->sndbufs[i]);
226 INIT_LIST_HEAD(&lgr->rmbs[i]);
227 }
9fda3510
HW
228 smc_lgr_list.num += SMC_LGR_NUM_INCR;
229 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
0cfdd8f9
UB
230 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
231 lgr->conns_all = RB_ROOT;
bc36d2fc 232 if (ini->is_smcd) {
c6ba7c9b 233 /* SMC-D specific settings */
bc36d2fc
KG
234 lgr->peer_gid = ini->ism_gid;
235 lgr->smcd = ini->ism_dev;
c6ba7c9b
HW
236 } else {
237 /* SMC-R specific settings */
238 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
bc36d2fc
KG
239 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
240 SMC_SYSTEMID_LEN);
c6ba7c9b
HW
241
242 lnk = &lgr->lnk[SMC_SINGLE_LINK];
243 /* initialize link */
244 lnk->state = SMC_LNK_ACTIVATING;
245 lnk->link_id = SMC_SINGLE_LINK;
bc36d2fc
KG
246 lnk->smcibdev = ini->ib_dev;
247 lnk->ibport = ini->ib_port;
248 lnk->path_mtu =
249 ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
250 if (!ini->ib_dev->initialized)
251 smc_ib_setup_per_ibdev(ini->ib_dev);
c6ba7c9b
HW
252 get_random_bytes(rndvec, sizeof(rndvec));
253 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
254 (rndvec[2] << 16);
7005ada6 255 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
bc36d2fc
KG
256 ini->vlan_id, lnk->gid,
257 &lnk->sgid_index);
7005ada6
UB
258 if (rc)
259 goto free_lgr;
c6ba7c9b
HW
260 rc = smc_llc_link_init(lnk);
261 if (rc)
262 goto free_lgr;
263 rc = smc_wr_alloc_link_mem(lnk);
264 if (rc)
265 goto clear_llc_lnk;
266 rc = smc_ib_create_protection_domain(lnk);
267 if (rc)
268 goto free_link_mem;
269 rc = smc_ib_create_queue_pair(lnk);
270 if (rc)
271 goto dealloc_pd;
272 rc = smc_wr_create_link(lnk);
273 if (rc)
274 goto destroy_qp;
275 }
0cfdd8f9 276 smc->conn.lgr = lgr;
0cfdd8f9
UB
277 spin_lock_bh(&smc_lgr_list.lock);
278 list_add(&lgr->list, &smc_lgr_list.list);
279 spin_unlock_bh(&smc_lgr_list.lock);
f38ba179
UB
280 return 0;
281
bd4ad577
UB
282destroy_qp:
283 smc_ib_destroy_queue_pair(lnk);
284dealloc_pd:
285 smc_ib_dealloc_protection_domain(lnk);
286free_link_mem:
287 smc_wr_free_link_mem(lnk);
2a4c57a9
KG
288clear_llc_lnk:
289 smc_llc_link_clear(lnk);
f38ba179
UB
290free_lgr:
291 kfree(lgr);
0cfdd8f9 292out:
7a62725a
KG
293 if (rc < 0) {
294 if (rc == -ENOMEM)
295 rc = SMC_CLC_DECL_MEM;
296 else
297 rc = SMC_CLC_DECL_INTERR;
298 }
0cfdd8f9
UB
299 return rc;
300}
301
fb692ec4
KG
302static void smc_buf_unuse(struct smc_connection *conn,
303 struct smc_link_group *lgr)
cd6851f3 304{
69cb7dc0 305 if (conn->sndbuf_desc)
cd6851f3 306 conn->sndbuf_desc->used = 0;
cd6851f3 307 if (conn->rmb_desc) {
a6920d1d 308 if (!conn->rmb_desc->regerr) {
c7674c00
KG
309 if (!lgr->is_smcd) {
310 /* unregister rmb with peer */
311 smc_llc_do_delete_rkey(
312 &lgr->lnk[SMC_SINGLE_LINK],
313 conn->rmb_desc);
314 }
a5e04318 315 conn->rmb_desc->used = 0;
a6920d1d
KG
316 } else {
317 /* buf registration failed, reuse not possible */
a6920d1d
KG
318 write_lock_bh(&lgr->rmbs_lock);
319 list_del(&conn->rmb_desc->list);
320 write_unlock_bh(&lgr->rmbs_lock);
321
6511aad3 322 smc_buf_free(lgr, true, conn->rmb_desc);
a6920d1d 323 }
cd6851f3
UB
324 }
325}
326
0cfdd8f9
UB
327/* remove a finished connection from its link group */
328void smc_conn_free(struct smc_connection *conn)
329{
fb692ec4
KG
330 struct smc_link_group *lgr = conn->lgr;
331
332 if (!lgr)
0cfdd8f9 333 return;
fb692ec4 334 if (lgr->is_smcd) {
c6ba7c9b 335 smc_ism_unset_conn(conn);
be244f28
HW
336 tasklet_kill(&conn->rx_tsklet);
337 } else {
c6ba7c9b 338 smc_cdc_tx_dismiss_slots(conn);
be244f28 339 }
8fc002b0 340 smc_lgr_unregister_conn(conn);
fb692ec4 341 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
8fc002b0 342 conn->lgr = NULL;
fb692ec4
KG
343
344 if (!lgr->conns_num)
345 smc_lgr_schedule_free_work(lgr);
0cfdd8f9
UB
346}
347
348static void smc_link_clear(struct smc_link *lnk)
349{
350 lnk->peer_qpn = 0;
2a4c57a9 351 smc_llc_link_clear(lnk);
bd4ad577 352 smc_ib_modify_qp_reset(lnk);
f38ba179 353 smc_wr_free_link(lnk);
bd4ad577
UB
354 smc_ib_destroy_queue_pair(lnk);
355 smc_ib_dealloc_protection_domain(lnk);
f38ba179 356 smc_wr_free_link_mem(lnk);
0cfdd8f9
UB
357}
358
c6ba7c9b
HW
359static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
360 struct smc_buf_desc *buf_desc)
cd6851f3 361{
6511aad3
HW
362 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
363
3e034725
UB
364 if (is_rmb) {
365 if (buf_desc->mr_rx[SMC_SINGLE_LINK])
366 smc_ib_put_memory_region(
367 buf_desc->mr_rx[SMC_SINGLE_LINK]);
368 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
369 DMA_FROM_DEVICE);
370 } else {
371 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
372 DMA_TO_DEVICE);
cd6851f3 373 }
3e034725 374 sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
2ef4f27a
SR
375 if (buf_desc->pages)
376 __free_pages(buf_desc->pages, buf_desc->order);
3e034725 377 kfree(buf_desc);
cd6851f3
UB
378}
379
c6ba7c9b
HW
380static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
381 struct smc_buf_desc *buf_desc)
382{
be244f28
HW
383 if (is_dmb) {
384 /* restore original buf len */
385 buf_desc->len += sizeof(struct smcd_cdc_msg);
c6ba7c9b 386 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
be244f28 387 } else {
c6ba7c9b 388 kfree(buf_desc->cpu_addr);
be244f28 389 }
c6ba7c9b
HW
390 kfree(buf_desc);
391}
392
393static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
394 struct smc_buf_desc *buf_desc)
395{
396 if (lgr->is_smcd)
397 smcd_buf_free(lgr, is_rmb, buf_desc);
398 else
399 smcr_buf_free(lgr, is_rmb, buf_desc);
400}
401
3e034725 402static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
cd6851f3 403{
3e034725
UB
404 struct smc_buf_desc *buf_desc, *bf_desc;
405 struct list_head *buf_list;
cd6851f3
UB
406 int i;
407
408 for (i = 0; i < SMC_RMBE_SIZES; i++) {
3e034725
UB
409 if (is_rmb)
410 buf_list = &lgr->rmbs[i];
411 else
412 buf_list = &lgr->sndbufs[i];
413 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
cd6851f3 414 list) {
3e034725 415 list_del(&buf_desc->list);
6511aad3 416 smc_buf_free(lgr, is_rmb, buf_desc);
cd6851f3
UB
417 }
418 }
419}
420
3e034725
UB
421static void smc_lgr_free_bufs(struct smc_link_group *lgr)
422{
423 /* free send buffers */
424 __smc_lgr_free_bufs(lgr, false);
425 /* free rmbs */
426 __smc_lgr_free_bufs(lgr, true);
427}
428
0cfdd8f9 429/* remove a link group */
3f3f0e36 430static void smc_lgr_free(struct smc_link_group *lgr)
0cfdd8f9 431{
3e034725 432 smc_lgr_free_bufs(lgr);
c6ba7c9b
HW
433 if (lgr->is_smcd)
434 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
435 else
436 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
0cfdd8f9
UB
437 kfree(lgr);
438}
439
9651b934
KG
440void smc_lgr_forget(struct smc_link_group *lgr)
441{
442 spin_lock_bh(&smc_lgr_list.lock);
443 /* do not use this link group for new connections */
444 if (!list_empty(&lgr->list))
445 list_del_init(&lgr->list);
446 spin_unlock_bh(&smc_lgr_list.lock);
447}
448
0cfdd8f9 449/* terminate linkgroup abnormally */
b9f227c3 450static void __smc_lgr_terminate(struct smc_link_group *lgr)
0cfdd8f9
UB
451{
452 struct smc_connection *conn;
b38d7324 453 struct smc_sock *smc;
0cfdd8f9
UB
454 struct rb_node *node;
455
517c300e
KG
456 if (lgr->terminating)
457 return; /* lgr already terminating */
458 lgr->terminating = 1;
b9f227c3
HW
459 if (!list_empty(&lgr->list)) /* forget lgr */
460 list_del_init(&lgr->list);
c6ba7c9b
HW
461 if (!lgr->is_smcd)
462 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
0cfdd8f9
UB
463
464 write_lock_bh(&lgr->conns_lock);
465 node = rb_first(&lgr->conns_all);
466 while (node) {
467 conn = rb_entry(node, struct smc_connection, alert_node);
b38d7324 468 smc = container_of(conn, struct smc_sock, conn);
51f1de79 469 sock_hold(&smc->sk); /* sock_put in close work */
732720fa 470 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
0cfdd8f9 471 __smc_lgr_unregister_conn(conn);
8fc002b0 472 conn->lgr = NULL;
732720fa 473 write_unlock_bh(&lgr->conns_lock);
51f1de79
UB
474 if (!schedule_work(&conn->close_work))
475 sock_put(&smc->sk);
732720fa 476 write_lock_bh(&lgr->conns_lock);
0cfdd8f9
UB
477 node = rb_first(&lgr->conns_all);
478 }
479 write_unlock_bh(&lgr->conns_lock);
c6ba7c9b
HW
480 if (!lgr->is_smcd)
481 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
97cdbc42 482 smc_lgr_schedule_free_work(lgr);
0cfdd8f9
UB
483}
484
b9f227c3
HW
485void smc_lgr_terminate(struct smc_link_group *lgr)
486{
487 spin_lock_bh(&smc_lgr_list.lock);
488 __smc_lgr_terminate(lgr);
489 spin_unlock_bh(&smc_lgr_list.lock);
490}
491
9fda3510
HW
492/* Called when IB port is terminated */
493void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
494{
495 struct smc_link_group *lgr, *l;
496
b9f227c3 497 spin_lock_bh(&smc_lgr_list.lock);
9fda3510 498 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
c6ba7c9b
HW
499 if (!lgr->is_smcd &&
500 lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
9fda3510 501 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
b9f227c3 502 __smc_lgr_terminate(lgr);
9fda3510 503 }
b9f227c3 504 spin_unlock_bh(&smc_lgr_list.lock);
9fda3510
HW
505}
506
c6ba7c9b 507/* Called when SMC-D device is terminated or peer is lost */
0512f69e 508void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
c6ba7c9b
HW
509{
510 struct smc_link_group *lgr, *l;
511 LIST_HEAD(lgr_free_list);
512
513 /* run common cleanup function and build free list */
514 spin_lock_bh(&smc_lgr_list.lock);
515 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
516 if (lgr->is_smcd && lgr->smcd == dev &&
517 (!peer_gid || lgr->peer_gid == peer_gid) &&
0512f69e 518 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
c6ba7c9b
HW
519 __smc_lgr_terminate(lgr);
520 list_move(&lgr->list, &lgr_free_list);
521 }
522 }
523 spin_unlock_bh(&smc_lgr_list.lock);
524
525 /* cancel the regular free workers and actually free lgrs */
526 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
527 list_del_init(&lgr->list);
528 cancel_delayed_work_sync(&lgr->free_work);
0512f69e
HW
529 if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
530 smc_ism_signal_shutdown(lgr);
c6ba7c9b
HW
531 smc_lgr_free(lgr);
532 }
533}
534
0cfdd8f9
UB
535/* Determine vlan of internal TCP socket.
536 * @vlan_id: address to store the determined vlan id into
537 */
bc36d2fc 538int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
0cfdd8f9
UB
539{
540 struct dst_entry *dst = sk_dst_get(clcsock->sk);
cb9d43f6
UB
541 struct net_device *ndev;
542 int i, nest_lvl, rc = 0;
0cfdd8f9 543
bc36d2fc 544 ini->vlan_id = 0;
0cfdd8f9
UB
545 if (!dst) {
546 rc = -ENOTCONN;
547 goto out;
548 }
549 if (!dst->dev) {
550 rc = -ENODEV;
551 goto out_rel;
552 }
553
cb9d43f6
UB
554 ndev = dst->dev;
555 if (is_vlan_dev(ndev)) {
bc36d2fc 556 ini->vlan_id = vlan_dev_vlan_id(ndev);
cb9d43f6
UB
557 goto out_rel;
558 }
559
560 rtnl_lock();
561 nest_lvl = dev_get_nest_level(ndev);
562 for (i = 0; i < nest_lvl; i++) {
563 struct list_head *lower = &ndev->adj_list.lower;
564
565 if (list_empty(lower))
566 break;
567 lower = lower->next;
568 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
569 if (is_vlan_dev(ndev)) {
bc36d2fc 570 ini->vlan_id = vlan_dev_vlan_id(ndev);
cb9d43f6
UB
571 break;
572 }
573 }
574 rtnl_unlock();
0cfdd8f9
UB
575
576out_rel:
577 dst_release(dst);
578out:
579 return rc;
580}
581
c6ba7c9b
HW
582static bool smcr_lgr_match(struct smc_link_group *lgr,
583 struct smc_clc_msg_local *lcl,
ee05ff7a 584 enum smc_lgr_role role, u32 clcqpn)
0cfdd8f9 585{
c6ba7c9b
HW
586 return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
587 SMC_SYSTEMID_LEN) &&
588 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
589 SMC_GID_SIZE) &&
590 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
591 sizeof(lcl->mac)) &&
ee05ff7a
KG
592 lgr->role == role &&
593 (lgr->role == SMC_SERV ||
594 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
c6ba7c9b 595}
0cfdd8f9 596
c6ba7c9b
HW
597static bool smcd_lgr_match(struct smc_link_group *lgr,
598 struct smcd_dev *smcismdev, u64 peer_gid)
599{
600 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
0cfdd8f9
UB
601}
602
603/* create a new SMC connection (and a new link group if necessary) */
bc36d2fc 604int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
0cfdd8f9
UB
605{
606 struct smc_connection *conn = &smc->conn;
607 struct smc_link_group *lgr;
0cfdd8f9 608 enum smc_lgr_role role;
0cfdd8f9
UB
609 int rc = 0;
610
7a62725a 611 ini->cln_first_contact = SMC_FIRST_CONTACT;
0cfdd8f9 612 role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
bc36d2fc 613 if (role == SMC_CLNT && ini->srv_first_contact)
0cfdd8f9
UB
614 /* create new link group as well */
615 goto create;
616
617 /* determine if an existing link group can be reused */
618 spin_lock_bh(&smc_lgr_list.lock);
619 list_for_each_entry(lgr, &smc_lgr_list.list, list) {
620 write_lock_bh(&lgr->conns_lock);
bc36d2fc
KG
621 if ((ini->is_smcd ?
622 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
623 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
0cfdd8f9 624 !lgr->sync_err &&
bc36d2fc 625 lgr->vlan_id == ini->vlan_id &&
c6ba7c9b
HW
626 (role == SMC_CLNT ||
627 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
0cfdd8f9 628 /* link group found */
7a62725a 629 ini->cln_first_contact = SMC_REUSE_CONTACT;
0cfdd8f9
UB
630 conn->lgr = lgr;
631 smc_lgr_register_conn(conn); /* add smc conn to lgr */
77f838ac
KG
632 if (delayed_work_pending(&lgr->free_work))
633 cancel_delayed_work(&lgr->free_work);
0cfdd8f9
UB
634 write_unlock_bh(&lgr->conns_lock);
635 break;
636 }
637 write_unlock_bh(&lgr->conns_lock);
638 }
639 spin_unlock_bh(&smc_lgr_list.lock);
640
bc36d2fc 641 if (role == SMC_CLNT && !ini->srv_first_contact &&
7a62725a 642 ini->cln_first_contact == SMC_FIRST_CONTACT) {
0cfdd8f9
UB
643 /* Server reuses a link group, but Client wants to start
644 * a new one
645 * send out_of_sync decline, reason synchr. error
646 */
7a62725a 647 return SMC_CLC_DECL_SYNCERR;
0cfdd8f9
UB
648 }
649
650create:
7a62725a 651 if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
bc36d2fc 652 rc = smc_lgr_create(smc, ini);
0cfdd8f9
UB
653 if (rc)
654 goto out;
44808792
HZ
655 lgr = conn->lgr;
656 write_lock_bh(&lgr->conns_lock);
0cfdd8f9 657 smc_lgr_register_conn(conn); /* add smc conn to lgr */
44808792 658 write_unlock_bh(&lgr->conns_lock);
0cfdd8f9 659 }
5f08318f 660 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
cbba07a7 661 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
de8474eb 662 conn->urg_state = SMC_URG_READ;
bc36d2fc 663 if (ini->is_smcd) {
be244f28
HW
664 conn->rx_off = sizeof(struct smcd_cdc_msg);
665 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
666 }
5f08318f
UB
667#ifndef KERNEL_HAS_ATOMIC64
668 spin_lock_init(&conn->acurs_lock);
669#endif
0cfdd8f9
UB
670
671out:
7a62725a 672 return rc;
0cfdd8f9 673}
cd6851f3 674
2f6becaf
HW
675/* convert the RMB size into the compressed notation - minimum 16K.
676 * In contrast to plain ilog2, this rounds towards the next power of 2,
677 * so the socket application gets at least its desired sndbuf / rcvbuf size.
678 */
679static u8 smc_compress_bufsize(int size)
680{
681 u8 compressed;
682
683 if (size <= SMC_BUF_MIN_SIZE)
684 return 0;
685
686 size = (size - 1) >> 14;
687 compressed = ilog2(size) + 1;
688 if (compressed >= SMC_RMBE_SIZES)
689 compressed = SMC_RMBE_SIZES - 1;
690 return compressed;
691}
692
693/* convert the RMB size from compressed notation into integer */
694int smc_uncompress_bufsize(u8 compressed)
695{
696 u32 size;
697
698 size = 0x00000001 << (((int)compressed) + 14);
699 return (int)size;
700}
701
3e034725
UB
702/* try to reuse a sndbuf or rmb description slot for a certain
703 * buffer size; if not available, return NULL
cd6851f3 704 */
8437bda0
HW
705static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
706 rwlock_t *lock,
707 struct list_head *buf_list)
cd6851f3 708{
3e034725 709 struct smc_buf_desc *buf_slot;
cd6851f3 710
3e034725
UB
711 read_lock_bh(lock);
712 list_for_each_entry(buf_slot, buf_list, list) {
713 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
714 read_unlock_bh(lock);
715 return buf_slot;
cd6851f3
UB
716 }
717 }
3e034725 718 read_unlock_bh(lock);
cd6851f3
UB
719 return NULL;
720}
721
952310cc
UB
722/* one of the conditions for announcing a receiver's current window size is
723 * that it "results in a minimum increase in the window size of 10% of the
724 * receive buffer space" [RFC7609]
725 */
726static inline int smc_rmb_wnd_update_limit(int rmbe_size)
727{
728 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
729}
730
c6ba7c9b
HW
731static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
732 bool is_rmb, int bufsize)
b33982c3
UB
733{
734 struct smc_buf_desc *buf_desc;
735 struct smc_link *lnk;
736 int rc;
737
738 /* try to alloc a new buffer */
739 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
740 if (!buf_desc)
741 return ERR_PTR(-ENOMEM);
742
2ef4f27a
SR
743 buf_desc->order = get_order(bufsize);
744 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
745 __GFP_NOMEMALLOC | __GFP_COMP |
746 __GFP_NORETRY | __GFP_ZERO,
747 buf_desc->order);
748 if (!buf_desc->pages) {
b33982c3
UB
749 kfree(buf_desc);
750 return ERR_PTR(-EAGAIN);
751 }
2ef4f27a 752 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
b33982c3
UB
753
754 /* build the sg table from the pages */
755 lnk = &lgr->lnk[SMC_SINGLE_LINK];
756 rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
757 GFP_KERNEL);
758 if (rc) {
6511aad3 759 smc_buf_free(lgr, is_rmb, buf_desc);
b33982c3
UB
760 return ERR_PTR(rc);
761 }
762 sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
763 buf_desc->cpu_addr, bufsize);
764
765 /* map sg table to DMA address */
766 rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
767 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
768 /* SMC protocol depends on mapping to one DMA address only */
769 if (rc != 1) {
6511aad3 770 smc_buf_free(lgr, is_rmb, buf_desc);
b33982c3
UB
771 return ERR_PTR(-EAGAIN);
772 }
773
774 /* create a new memory region for the RMB */
775 if (is_rmb) {
776 rc = smc_ib_get_memory_region(lnk->roce_pd,
777 IB_ACCESS_REMOTE_WRITE |
778 IB_ACCESS_LOCAL_WRITE,
779 buf_desc);
780 if (rc) {
6511aad3 781 smc_buf_free(lgr, is_rmb, buf_desc);
b33982c3
UB
782 return ERR_PTR(rc);
783 }
784 }
785
69cb7dc0 786 buf_desc->len = bufsize;
b33982c3
UB
787 return buf_desc;
788}
789
c6ba7c9b
HW
790#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
791
792static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
793 bool is_dmb, int bufsize)
794{
795 struct smc_buf_desc *buf_desc;
796 int rc;
797
798 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
799 return ERR_PTR(-EAGAIN);
800
801 /* try to alloc a new DMB */
802 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
803 if (!buf_desc)
804 return ERR_PTR(-ENOMEM);
805 if (is_dmb) {
806 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
807 if (rc) {
808 kfree(buf_desc);
809 return ERR_PTR(-EAGAIN);
810 }
be244f28
HW
811 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
812 /* CDC header stored in buf. So, pretend it was smaller */
813 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
c6ba7c9b
HW
814 } else {
815 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
816 __GFP_NOWARN | __GFP_NORETRY |
817 __GFP_NOMEMALLOC);
818 if (!buf_desc->cpu_addr) {
819 kfree(buf_desc);
820 return ERR_PTR(-EAGAIN);
821 }
822 buf_desc->len = bufsize;
823 }
824 return buf_desc;
825}
826
827static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
cd6851f3 828{
8437bda0 829 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
cd6851f3
UB
830 struct smc_connection *conn = &smc->conn;
831 struct smc_link_group *lgr = conn->lgr;
3e034725 832 struct list_head *buf_list;
c45abf31 833 int bufsize, bufsize_short;
3e034725
UB
834 int sk_buf_size;
835 rwlock_t *lock;
cd6851f3 836
3e034725
UB
837 if (is_rmb)
838 /* use socket recv buffer size (w/o overhead) as start value */
839 sk_buf_size = smc->sk.sk_rcvbuf / 2;
840 else
841 /* use socket send buffer size (w/o overhead) as start value */
842 sk_buf_size = smc->sk.sk_sndbuf / 2;
843
4e1061f4 844 for (bufsize_short = smc_compress_bufsize(sk_buf_size);
c45abf31 845 bufsize_short >= 0; bufsize_short--) {
9d8fb617 846
3e034725
UB
847 if (is_rmb) {
848 lock = &lgr->rmbs_lock;
849 buf_list = &lgr->rmbs[bufsize_short];
850 } else {
851 lock = &lgr->sndbufs_lock;
852 buf_list = &lgr->sndbufs[bufsize_short];
9d8fb617 853 }
c45abf31 854 bufsize = smc_uncompress_bufsize(bufsize_short);
a3fe3d01
UB
855 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
856 continue;
857
3e034725 858 /* check for reusable slot in the link group */
8437bda0 859 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
3e034725
UB
860 if (buf_desc) {
861 memset(buf_desc->cpu_addr, 0, bufsize);
cd6851f3
UB
862 break; /* found reusable slot */
863 }
a3fe3d01 864
c6ba7c9b
HW
865 if (is_smcd)
866 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
867 else
868 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
869
b33982c3
UB
870 if (PTR_ERR(buf_desc) == -ENOMEM)
871 break;
872 if (IS_ERR(buf_desc))
a3fe3d01 873 continue;
897e1c24 874
3e034725
UB
875 buf_desc->used = 1;
876 write_lock_bh(lock);
877 list_add(&buf_desc->list, buf_list);
878 write_unlock_bh(lock);
879 break; /* found */
cd6851f3 880 }
3e034725 881
b33982c3 882 if (IS_ERR(buf_desc))
3e034725
UB
883 return -ENOMEM;
884
885 if (is_rmb) {
886 conn->rmb_desc = buf_desc;
c45abf31
UB
887 conn->rmbe_size_short = bufsize_short;
888 smc->sk.sk_rcvbuf = bufsize * 2;
5f08318f 889 atomic_set(&conn->bytes_to_rcv, 0);
be244f28
HW
890 conn->rmbe_update_limit =
891 smc_rmb_wnd_update_limit(buf_desc->len);
c6ba7c9b
HW
892 if (is_smcd)
893 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
cd6851f3 894 } else {
3e034725 895 conn->sndbuf_desc = buf_desc;
3e034725
UB
896 smc->sk.sk_sndbuf = bufsize * 2;
897 atomic_set(&conn->sndbuf_space, bufsize);
cd6851f3 898 }
3e034725
UB
899 return 0;
900}
901
10428dd8
UB
902void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
903{
904 struct smc_link_group *lgr = conn->lgr;
905
c6ba7c9b
HW
906 if (!conn->lgr || conn->lgr->is_smcd)
907 return;
10428dd8
UB
908 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
909 conn->sndbuf_desc, DMA_TO_DEVICE);
910}
911
912void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
913{
914 struct smc_link_group *lgr = conn->lgr;
915
c6ba7c9b
HW
916 if (!conn->lgr || conn->lgr->is_smcd)
917 return;
10428dd8
UB
918 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
919 conn->sndbuf_desc, DMA_TO_DEVICE);
920}
921
922void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
923{
924 struct smc_link_group *lgr = conn->lgr;
925
c6ba7c9b
HW
926 if (!conn->lgr || conn->lgr->is_smcd)
927 return;
10428dd8
UB
928 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
929 conn->rmb_desc, DMA_FROM_DEVICE);
930}
931
932void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
933{
934 struct smc_link_group *lgr = conn->lgr;
935
c6ba7c9b
HW
936 if (!conn->lgr || conn->lgr->is_smcd)
937 return;
10428dd8
UB
938 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
939 conn->rmb_desc, DMA_FROM_DEVICE);
940}
941
3e034725
UB
942/* create the send and receive buffer for an SMC socket;
943 * receive buffers are called RMBs;
944 * (even though the SMC protocol allows more than one RMB-element per RMB,
945 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
946 * extra RMB for every connection in a link group
947 */
c6ba7c9b 948int smc_buf_create(struct smc_sock *smc, bool is_smcd)
3e034725
UB
949{
950 int rc;
951
952 /* create send buffer */
c6ba7c9b 953 rc = __smc_buf_create(smc, is_smcd, false);
3e034725
UB
954 if (rc)
955 return rc;
956 /* create rmb */
c6ba7c9b 957 rc = __smc_buf_create(smc, is_smcd, true);
3e034725 958 if (rc)
6511aad3 959 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
3e034725 960 return rc;
cd6851f3 961}
bd4ad577
UB
962
963static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
964{
965 int i;
966
967 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
968 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
969 return i;
970 }
971 return -ENOSPC;
972}
973
4ed75de5
KG
974/* add a new rtoken from peer */
975int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
bd4ad577 976{
4ed75de5
KG
977 u64 dma_addr = be64_to_cpu(nw_vaddr);
978 u32 rkey = ntohl(nw_rkey);
bd4ad577
UB
979 int i;
980
981 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
982 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
263eec9b 983 (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
bd4ad577 984 test_bit(i, lgr->rtokens_used_mask)) {
4ed75de5
KG
985 /* already in list */
986 return i;
987 }
988 }
989 i = smc_rmb_reserve_rtoken_idx(lgr);
990 if (i < 0)
991 return i;
992 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
993 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
994 return i;
995}
996
997/* delete an rtoken */
998int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
999{
1000 u32 rkey = ntohl(nw_rkey);
1001 int i;
1002
1003 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1004 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
1005 test_bit(i, lgr->rtokens_used_mask)) {
1006 lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
1007 lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
1008
1009 clear_bit(i, lgr->rtokens_used_mask);
bd4ad577
UB
1010 return 0;
1011 }
1012 }
4ed75de5
KG
1013 return -ENOENT;
1014}
1015
1016/* save rkey and dma_addr received from peer during clc handshake */
1017int smc_rmb_rtoken_handling(struct smc_connection *conn,
1018 struct smc_clc_msg_accept_confirm *clc)
1019{
1020 conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
1021 clc->rmb_rkey);
bd4ad577
UB
1022 if (conn->rtoken_idx < 0)
1023 return conn->rtoken_idx;
bd4ad577
UB
1024 return 0;
1025}
9fda3510
HW
1026
1027/* Called (from smc_exit) when module is removed */
1028void smc_core_exit(void)
1029{
1030 struct smc_link_group *lgr, *lg;
1031 LIST_HEAD(lgr_freeing_list);
1032
1033 spin_lock_bh(&smc_lgr_list.lock);
1034 if (!list_empty(&smc_lgr_list.list))
1035 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
1036 spin_unlock_bh(&smc_lgr_list.lock);
1037 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
1038 list_del_init(&lgr->list);
0d18a0cb
KG
1039 if (!lgr->is_smcd) {
1040 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
1041
1042 if (lnk->state == SMC_LNK_ACTIVE)
1043 smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
1044 false);
1045 smc_llc_link_inactive(lnk);
1046 }
9fda3510 1047 cancel_delayed_work_sync(&lgr->free_work);
0512f69e
HW
1048 if (lgr->is_smcd)
1049 smc_ism_signal_shutdown(lgr);
9fda3510
HW
1050 smc_lgr_free(lgr); /* free link group */
1051 }
1052}