Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
0cfdd8f9 UB |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | |
4 | * | |
5 | * Basic Transport Functions exploiting Infiniband API | |
6 | * | |
7 | * Copyright IBM Corp. 2016 | |
8 | * | |
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | |
10 | */ | |
11 | ||
12 | #include <linux/socket.h> | |
13 | #include <linux/if_vlan.h> | |
14 | #include <linux/random.h> | |
15 | #include <linux/workqueue.h> | |
16 | #include <net/tcp.h> | |
17 | #include <net/sock.h> | |
18 | #include <rdma/ib_verbs.h> | |
19 | ||
20 | #include "smc.h" | |
21 | #include "smc_clc.h" | |
22 | #include "smc_core.h" | |
23 | #include "smc_ib.h" | |
f38ba179 | 24 | #include "smc_wr.h" |
9bf9abea | 25 | #include "smc_llc.h" |
5f08318f | 26 | #include "smc_cdc.h" |
b38d7324 | 27 | #include "smc_close.h" |
0cfdd8f9 | 28 | |
5bc11ddb UB |
29 | #define SMC_LGR_NUM_INCR 256 |
30 | #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) | |
7f58a1ad | 31 | #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) |
0cfdd8f9 | 32 | |
9fda3510 HW |
33 | static struct smc_lgr_list smc_lgr_list = { /* established link groups */ |
34 | .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), | |
35 | .list = LIST_HEAD_INIT(smc_lgr_list.list), | |
36 | .num = 0, | |
37 | }; | |
9bf9abea | 38 | |
6511aad3 HW |
39 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, |
40 | struct smc_buf_desc *buf_desc); | |
a6920d1d | 41 | |
97cdbc42 KG |
42 | static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) |
43 | { | |
44 | /* client link group creation always follows the server link group | |
45 | * creation. For client use a somewhat higher removal delay time, | |
46 | * otherwise there is a risk of out-of-sync link groups. | |
47 | */ | |
48 | mod_delayed_work(system_wq, &lgr->free_work, | |
49 | lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : | |
50 | SMC_LGR_FREE_DELAY_SERV); | |
51 | } | |
52 | ||
0cfdd8f9 UB |
53 | /* Register connection's alert token in our lookup structure. |
54 | * To use rbtrees we have to implement our own insert core. | |
55 | * Requires @conns_lock | |
56 | * @smc connection to register | |
57 | * Returns 0 on success, != otherwise. | |
58 | */ | |
59 | static void smc_lgr_add_alert_token(struct smc_connection *conn) | |
60 | { | |
61 | struct rb_node **link, *parent = NULL; | |
62 | u32 token = conn->alert_token_local; | |
63 | ||
64 | link = &conn->lgr->conns_all.rb_node; | |
65 | while (*link) { | |
66 | struct smc_connection *cur = rb_entry(*link, | |
67 | struct smc_connection, alert_node); | |
68 | ||
69 | parent = *link; | |
70 | if (cur->alert_token_local > token) | |
71 | link = &parent->rb_left; | |
72 | else | |
73 | link = &parent->rb_right; | |
74 | } | |
75 | /* Put the new node there */ | |
76 | rb_link_node(&conn->alert_node, parent, link); | |
77 | rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); | |
78 | } | |
79 | ||
80 | /* Register connection in link group by assigning an alert token | |
81 | * registered in a search tree. | |
82 | * Requires @conns_lock | |
83 | * Note that '0' is a reserved value and not assigned. | |
84 | */ | |
85 | static void smc_lgr_register_conn(struct smc_connection *conn) | |
86 | { | |
87 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | |
88 | static atomic_t nexttoken = ATOMIC_INIT(0); | |
89 | ||
90 | /* find a new alert_token_local value not yet used by some connection | |
91 | * in this link group | |
92 | */ | |
93 | sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ | |
94 | while (!conn->alert_token_local) { | |
95 | conn->alert_token_local = atomic_inc_return(&nexttoken); | |
96 | if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) | |
97 | conn->alert_token_local = 0; | |
98 | } | |
99 | smc_lgr_add_alert_token(conn); | |
100 | conn->lgr->conns_num++; | |
101 | } | |
102 | ||
103 | /* Unregister connection and reset the alert token of the given connection< | |
104 | */ | |
105 | static void __smc_lgr_unregister_conn(struct smc_connection *conn) | |
106 | { | |
107 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | |
108 | struct smc_link_group *lgr = conn->lgr; | |
109 | ||
110 | rb_erase(&conn->alert_node, &lgr->conns_all); | |
111 | lgr->conns_num--; | |
112 | conn->alert_token_local = 0; | |
113 | conn->lgr = NULL; | |
114 | sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ | |
115 | } | |
116 | ||
117 | /* Unregister connection and trigger lgr freeing if applicable | |
118 | */ | |
119 | static void smc_lgr_unregister_conn(struct smc_connection *conn) | |
120 | { | |
121 | struct smc_link_group *lgr = conn->lgr; | |
122 | int reduced = 0; | |
123 | ||
124 | write_lock_bh(&lgr->conns_lock); | |
125 | if (conn->alert_token_local) { | |
126 | reduced = 1; | |
127 | __smc_lgr_unregister_conn(conn); | |
128 | } | |
129 | write_unlock_bh(&lgr->conns_lock); | |
5bc11ddb UB |
130 | if (!reduced || lgr->conns_num) |
131 | return; | |
97cdbc42 | 132 | smc_lgr_schedule_free_work(lgr); |
0cfdd8f9 UB |
133 | } |
134 | ||
135 | static void smc_lgr_free_work(struct work_struct *work) | |
136 | { | |
137 | struct smc_link_group *lgr = container_of(to_delayed_work(work), | |
138 | struct smc_link_group, | |
139 | free_work); | |
140 | bool conns; | |
141 | ||
142 | spin_lock_bh(&smc_lgr_list.lock); | |
610db66f UB |
143 | if (list_empty(&lgr->list)) |
144 | goto free; | |
0cfdd8f9 UB |
145 | read_lock_bh(&lgr->conns_lock); |
146 | conns = RB_EMPTY_ROOT(&lgr->conns_all); | |
147 | read_unlock_bh(&lgr->conns_lock); | |
148 | if (!conns) { /* number of lgr connections is no longer zero */ | |
149 | spin_unlock_bh(&smc_lgr_list.lock); | |
150 | return; | |
151 | } | |
152 | list_del_init(&lgr->list); /* remove from smc_lgr_list */ | |
610db66f | 153 | free: |
0cfdd8f9 | 154 | spin_unlock_bh(&smc_lgr_list.lock); |
3cf52eb1 KG |
155 | if (!delayed_work_pending(&lgr->free_work)) { |
156 | if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) | |
157 | smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); | |
268ffcc4 | 158 | smc_lgr_free(lgr); |
3cf52eb1 | 159 | } |
0cfdd8f9 UB |
160 | } |
161 | ||
162 | /* create a new SMC link group */ | |
be6d467b | 163 | static int smc_lgr_create(struct smc_sock *smc, |
0cfdd8f9 UB |
164 | struct smc_ib_device *smcibdev, u8 ibport, |
165 | char *peer_systemid, unsigned short vlan_id) | |
166 | { | |
167 | struct smc_link_group *lgr; | |
168 | struct smc_link *lnk; | |
169 | u8 rndvec[3]; | |
170 | int rc = 0; | |
cd6851f3 | 171 | int i; |
0cfdd8f9 UB |
172 | |
173 | lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); | |
174 | if (!lgr) { | |
175 | rc = -ENOMEM; | |
176 | goto out; | |
177 | } | |
178 | lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; | |
517c300e | 179 | lgr->sync_err = 0; |
0cfdd8f9 UB |
180 | memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); |
181 | lgr->vlan_id = vlan_id; | |
cd6851f3 UB |
182 | rwlock_init(&lgr->sndbufs_lock); |
183 | rwlock_init(&lgr->rmbs_lock); | |
184 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | |
185 | INIT_LIST_HEAD(&lgr->sndbufs[i]); | |
186 | INIT_LIST_HEAD(&lgr->rmbs[i]); | |
187 | } | |
9fda3510 HW |
188 | smc_lgr_list.num += SMC_LGR_NUM_INCR; |
189 | memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); | |
0cfdd8f9 UB |
190 | INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); |
191 | lgr->conns_all = RB_ROOT; | |
192 | ||
193 | lnk = &lgr->lnk[SMC_SINGLE_LINK]; | |
194 | /* initialize link */ | |
52bedf37 | 195 | lnk->state = SMC_LNK_ACTIVATING; |
2be922f3 | 196 | lnk->link_id = SMC_SINGLE_LINK; |
0cfdd8f9 UB |
197 | lnk->smcibdev = smcibdev; |
198 | lnk->ibport = ibport; | |
199 | lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; | |
bd4ad577 UB |
200 | if (!smcibdev->initialized) |
201 | smc_ib_setup_per_ibdev(smcibdev); | |
0cfdd8f9 UB |
202 | get_random_bytes(rndvec, sizeof(rndvec)); |
203 | lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); | |
2a4c57a9 | 204 | rc = smc_llc_link_init(lnk); |
f38ba179 UB |
205 | if (rc) |
206 | goto free_lgr; | |
2a4c57a9 KG |
207 | rc = smc_wr_alloc_link_mem(lnk); |
208 | if (rc) | |
209 | goto clear_llc_lnk; | |
bd4ad577 UB |
210 | rc = smc_ib_create_protection_domain(lnk); |
211 | if (rc) | |
212 | goto free_link_mem; | |
213 | rc = smc_ib_create_queue_pair(lnk); | |
214 | if (rc) | |
215 | goto dealloc_pd; | |
216 | rc = smc_wr_create_link(lnk); | |
217 | if (rc) | |
218 | goto destroy_qp; | |
0cfdd8f9 UB |
219 | |
220 | smc->conn.lgr = lgr; | |
221 | rwlock_init(&lgr->conns_lock); | |
222 | spin_lock_bh(&smc_lgr_list.lock); | |
223 | list_add(&lgr->list, &smc_lgr_list.list); | |
224 | spin_unlock_bh(&smc_lgr_list.lock); | |
f38ba179 UB |
225 | return 0; |
226 | ||
bd4ad577 UB |
227 | destroy_qp: |
228 | smc_ib_destroy_queue_pair(lnk); | |
229 | dealloc_pd: | |
230 | smc_ib_dealloc_protection_domain(lnk); | |
231 | free_link_mem: | |
232 | smc_wr_free_link_mem(lnk); | |
2a4c57a9 KG |
233 | clear_llc_lnk: |
234 | smc_llc_link_clear(lnk); | |
f38ba179 UB |
235 | free_lgr: |
236 | kfree(lgr); | |
0cfdd8f9 UB |
237 | out: |
238 | return rc; | |
239 | } | |
240 | ||
3e034725 | 241 | static void smc_buf_unuse(struct smc_connection *conn) |
cd6851f3 | 242 | { |
69cb7dc0 | 243 | if (conn->sndbuf_desc) |
cd6851f3 | 244 | conn->sndbuf_desc->used = 0; |
cd6851f3 | 245 | if (conn->rmb_desc) { |
a6920d1d KG |
246 | if (!conn->rmb_desc->regerr) { |
247 | conn->rmb_desc->reused = 1; | |
248 | conn->rmb_desc->used = 0; | |
a6920d1d KG |
249 | } else { |
250 | /* buf registration failed, reuse not possible */ | |
251 | struct smc_link_group *lgr = conn->lgr; | |
a6920d1d KG |
252 | |
253 | write_lock_bh(&lgr->rmbs_lock); | |
254 | list_del(&conn->rmb_desc->list); | |
255 | write_unlock_bh(&lgr->rmbs_lock); | |
256 | ||
6511aad3 | 257 | smc_buf_free(lgr, true, conn->rmb_desc); |
a6920d1d | 258 | } |
cd6851f3 UB |
259 | } |
260 | } | |
261 | ||
0cfdd8f9 UB |
262 | /* remove a finished connection from its link group */ |
263 | void smc_conn_free(struct smc_connection *conn) | |
264 | { | |
732720fa | 265 | if (!conn->lgr) |
0cfdd8f9 | 266 | return; |
5f08318f | 267 | smc_cdc_tx_dismiss_slots(conn); |
0cfdd8f9 | 268 | smc_lgr_unregister_conn(conn); |
3e034725 | 269 | smc_buf_unuse(conn); |
0cfdd8f9 UB |
270 | } |
271 | ||
272 | static void smc_link_clear(struct smc_link *lnk) | |
273 | { | |
274 | lnk->peer_qpn = 0; | |
2a4c57a9 | 275 | smc_llc_link_clear(lnk); |
bd4ad577 | 276 | smc_ib_modify_qp_reset(lnk); |
f38ba179 | 277 | smc_wr_free_link(lnk); |
bd4ad577 UB |
278 | smc_ib_destroy_queue_pair(lnk); |
279 | smc_ib_dealloc_protection_domain(lnk); | |
f38ba179 | 280 | smc_wr_free_link_mem(lnk); |
0cfdd8f9 UB |
281 | } |
282 | ||
6511aad3 HW |
283 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, |
284 | struct smc_buf_desc *buf_desc) | |
cd6851f3 | 285 | { |
6511aad3 HW |
286 | struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
287 | ||
3e034725 UB |
288 | if (is_rmb) { |
289 | if (buf_desc->mr_rx[SMC_SINGLE_LINK]) | |
290 | smc_ib_put_memory_region( | |
291 | buf_desc->mr_rx[SMC_SINGLE_LINK]); | |
292 | smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, | |
293 | DMA_FROM_DEVICE); | |
294 | } else { | |
295 | smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, | |
296 | DMA_TO_DEVICE); | |
cd6851f3 | 297 | } |
3e034725 | 298 | sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); |
2ef4f27a SR |
299 | if (buf_desc->pages) |
300 | __free_pages(buf_desc->pages, buf_desc->order); | |
3e034725 | 301 | kfree(buf_desc); |
cd6851f3 UB |
302 | } |
303 | ||
3e034725 | 304 | static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) |
cd6851f3 | 305 | { |
3e034725 UB |
306 | struct smc_buf_desc *buf_desc, *bf_desc; |
307 | struct list_head *buf_list; | |
cd6851f3 UB |
308 | int i; |
309 | ||
310 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | |
3e034725 UB |
311 | if (is_rmb) |
312 | buf_list = &lgr->rmbs[i]; | |
313 | else | |
314 | buf_list = &lgr->sndbufs[i]; | |
315 | list_for_each_entry_safe(buf_desc, bf_desc, buf_list, | |
cd6851f3 | 316 | list) { |
3e034725 | 317 | list_del(&buf_desc->list); |
6511aad3 | 318 | smc_buf_free(lgr, is_rmb, buf_desc); |
cd6851f3 UB |
319 | } |
320 | } | |
321 | } | |
322 | ||
3e034725 UB |
323 | static void smc_lgr_free_bufs(struct smc_link_group *lgr) |
324 | { | |
325 | /* free send buffers */ | |
326 | __smc_lgr_free_bufs(lgr, false); | |
327 | /* free rmbs */ | |
328 | __smc_lgr_free_bufs(lgr, true); | |
329 | } | |
330 | ||
0cfdd8f9 UB |
331 | /* remove a link group */ |
332 | void smc_lgr_free(struct smc_link_group *lgr) | |
333 | { | |
3e034725 | 334 | smc_lgr_free_bufs(lgr); |
0cfdd8f9 UB |
335 | smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); |
336 | kfree(lgr); | |
337 | } | |
338 | ||
9651b934 KG |
339 | void smc_lgr_forget(struct smc_link_group *lgr) |
340 | { | |
341 | spin_lock_bh(&smc_lgr_list.lock); | |
342 | /* do not use this link group for new connections */ | |
343 | if (!list_empty(&lgr->list)) | |
344 | list_del_init(&lgr->list); | |
345 | spin_unlock_bh(&smc_lgr_list.lock); | |
346 | } | |
347 | ||
0cfdd8f9 | 348 | /* terminate linkgroup abnormally */ |
b9f227c3 | 349 | static void __smc_lgr_terminate(struct smc_link_group *lgr) |
0cfdd8f9 UB |
350 | { |
351 | struct smc_connection *conn; | |
b38d7324 | 352 | struct smc_sock *smc; |
0cfdd8f9 UB |
353 | struct rb_node *node; |
354 | ||
517c300e KG |
355 | if (lgr->terminating) |
356 | return; /* lgr already terminating */ | |
357 | lgr->terminating = 1; | |
b9f227c3 HW |
358 | if (!list_empty(&lgr->list)) /* forget lgr */ |
359 | list_del_init(&lgr->list); | |
877ae5be | 360 | smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); |
0cfdd8f9 UB |
361 | |
362 | write_lock_bh(&lgr->conns_lock); | |
363 | node = rb_first(&lgr->conns_all); | |
364 | while (node) { | |
365 | conn = rb_entry(node, struct smc_connection, alert_node); | |
b38d7324 | 366 | smc = container_of(conn, struct smc_sock, conn); |
51f1de79 | 367 | sock_hold(&smc->sk); /* sock_put in close work */ |
732720fa | 368 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
0cfdd8f9 | 369 | __smc_lgr_unregister_conn(conn); |
732720fa | 370 | write_unlock_bh(&lgr->conns_lock); |
51f1de79 UB |
371 | if (!schedule_work(&conn->close_work)) |
372 | sock_put(&smc->sk); | |
732720fa | 373 | write_lock_bh(&lgr->conns_lock); |
0cfdd8f9 UB |
374 | node = rb_first(&lgr->conns_all); |
375 | } | |
376 | write_unlock_bh(&lgr->conns_lock); | |
732720fa | 377 | wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); |
97cdbc42 | 378 | smc_lgr_schedule_free_work(lgr); |
0cfdd8f9 UB |
379 | } |
380 | ||
b9f227c3 HW |
381 | void smc_lgr_terminate(struct smc_link_group *lgr) |
382 | { | |
383 | spin_lock_bh(&smc_lgr_list.lock); | |
384 | __smc_lgr_terminate(lgr); | |
385 | spin_unlock_bh(&smc_lgr_list.lock); | |
386 | } | |
387 | ||
9fda3510 HW |
388 | /* Called when IB port is terminated */ |
389 | void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) | |
390 | { | |
391 | struct smc_link_group *lgr, *l; | |
392 | ||
b9f227c3 | 393 | spin_lock_bh(&smc_lgr_list.lock); |
9fda3510 HW |
394 | list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { |
395 | if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && | |
396 | lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) | |
b9f227c3 | 397 | __smc_lgr_terminate(lgr); |
9fda3510 | 398 | } |
b9f227c3 | 399 | spin_unlock_bh(&smc_lgr_list.lock); |
9fda3510 HW |
400 | } |
401 | ||
0cfdd8f9 UB |
402 | /* Determine vlan of internal TCP socket. |
403 | * @vlan_id: address to store the determined vlan id into | |
404 | */ | |
405 | static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) | |
406 | { | |
407 | struct dst_entry *dst = sk_dst_get(clcsock->sk); | |
cb9d43f6 UB |
408 | struct net_device *ndev; |
409 | int i, nest_lvl, rc = 0; | |
0cfdd8f9 UB |
410 | |
411 | *vlan_id = 0; | |
412 | if (!dst) { | |
413 | rc = -ENOTCONN; | |
414 | goto out; | |
415 | } | |
416 | if (!dst->dev) { | |
417 | rc = -ENODEV; | |
418 | goto out_rel; | |
419 | } | |
420 | ||
cb9d43f6 UB |
421 | ndev = dst->dev; |
422 | if (is_vlan_dev(ndev)) { | |
423 | *vlan_id = vlan_dev_vlan_id(ndev); | |
424 | goto out_rel; | |
425 | } | |
426 | ||
427 | rtnl_lock(); | |
428 | nest_lvl = dev_get_nest_level(ndev); | |
429 | for (i = 0; i < nest_lvl; i++) { | |
430 | struct list_head *lower = &ndev->adj_list.lower; | |
431 | ||
432 | if (list_empty(lower)) | |
433 | break; | |
434 | lower = lower->next; | |
435 | ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); | |
436 | if (is_vlan_dev(ndev)) { | |
437 | *vlan_id = vlan_dev_vlan_id(ndev); | |
438 | break; | |
439 | } | |
440 | } | |
441 | rtnl_unlock(); | |
0cfdd8f9 UB |
442 | |
443 | out_rel: | |
444 | dst_release(dst); | |
445 | out: | |
446 | return rc; | |
447 | } | |
448 | ||
449 | /* determine the link gid matching the vlan id of the link group */ | |
450 | static int smc_link_determine_gid(struct smc_link_group *lgr) | |
451 | { | |
452 | struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; | |
453 | struct ib_gid_attr gattr; | |
454 | union ib_gid gid; | |
455 | int i; | |
456 | ||
457 | if (!lgr->vlan_id) { | |
458 | lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; | |
459 | return 0; | |
460 | } | |
461 | ||
462 | for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; | |
463 | i++) { | |
464 | if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, | |
465 | &gattr)) | |
466 | continue; | |
43e2ada3 UB |
467 | if (gattr.ndev) { |
468 | if (is_vlan_dev(gattr.ndev) && | |
469 | vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { | |
470 | lnk->gid = gid; | |
471 | dev_put(gattr.ndev); | |
472 | return 0; | |
473 | } | |
474 | dev_put(gattr.ndev); | |
0cfdd8f9 UB |
475 | } |
476 | } | |
477 | return -ENODEV; | |
478 | } | |
479 | ||
480 | /* create a new SMC connection (and a new link group if necessary) */ | |
be6d467b | 481 | int smc_conn_create(struct smc_sock *smc, |
0cfdd8f9 UB |
482 | struct smc_ib_device *smcibdev, u8 ibport, |
483 | struct smc_clc_msg_local *lcl, int srv_first_contact) | |
484 | { | |
485 | struct smc_connection *conn = &smc->conn; | |
8437bda0 | 486 | int local_contact = SMC_FIRST_CONTACT; |
0cfdd8f9 UB |
487 | struct smc_link_group *lgr; |
488 | unsigned short vlan_id; | |
489 | enum smc_lgr_role role; | |
0cfdd8f9 UB |
490 | int rc = 0; |
491 | ||
492 | role = smc->listen_smc ? SMC_SERV : SMC_CLNT; | |
493 | rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); | |
494 | if (rc) | |
495 | return rc; | |
496 | ||
497 | if ((role == SMC_CLNT) && srv_first_contact) | |
498 | /* create new link group as well */ | |
499 | goto create; | |
500 | ||
501 | /* determine if an existing link group can be reused */ | |
502 | spin_lock_bh(&smc_lgr_list.lock); | |
503 | list_for_each_entry(lgr, &smc_lgr_list.list, list) { | |
504 | write_lock_bh(&lgr->conns_lock); | |
505 | if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, | |
506 | SMC_SYSTEMID_LEN) && | |
507 | !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, | |
508 | SMC_GID_SIZE) && | |
509 | !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, | |
510 | sizeof(lcl->mac)) && | |
511 | !lgr->sync_err && | |
512 | (lgr->role == role) && | |
cd6851f3 UB |
513 | (lgr->vlan_id == vlan_id) && |
514 | ((role == SMC_CLNT) || | |
515 | (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { | |
0cfdd8f9 UB |
516 | /* link group found */ |
517 | local_contact = SMC_REUSE_CONTACT; | |
518 | conn->lgr = lgr; | |
519 | smc_lgr_register_conn(conn); /* add smc conn to lgr */ | |
520 | write_unlock_bh(&lgr->conns_lock); | |
521 | break; | |
522 | } | |
523 | write_unlock_bh(&lgr->conns_lock); | |
524 | } | |
525 | spin_unlock_bh(&smc_lgr_list.lock); | |
526 | ||
527 | if (role == SMC_CLNT && !srv_first_contact && | |
528 | (local_contact == SMC_FIRST_CONTACT)) { | |
529 | /* Server reuses a link group, but Client wants to start | |
530 | * a new one | |
531 | * send out_of_sync decline, reason synchr. error | |
532 | */ | |
533 | return -ENOLINK; | |
534 | } | |
535 | ||
536 | create: | |
537 | if (local_contact == SMC_FIRST_CONTACT) { | |
be6d467b | 538 | rc = smc_lgr_create(smc, smcibdev, ibport, |
0cfdd8f9 UB |
539 | lcl->id_for_peer, vlan_id); |
540 | if (rc) | |
541 | goto out; | |
542 | smc_lgr_register_conn(conn); /* add smc conn to lgr */ | |
543 | rc = smc_link_determine_gid(conn->lgr); | |
544 | } | |
5f08318f | 545 | conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; |
cbba07a7 | 546 | conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; |
de8474eb | 547 | conn->urg_state = SMC_URG_READ; |
5f08318f UB |
548 | #ifndef KERNEL_HAS_ATOMIC64 |
549 | spin_lock_init(&conn->acurs_lock); | |
550 | #endif | |
0cfdd8f9 UB |
551 | |
552 | out: | |
553 | return rc ? rc : local_contact; | |
554 | } | |
cd6851f3 | 555 | |
2f6becaf HW |
556 | /* convert the RMB size into the compressed notation - minimum 16K. |
557 | * In contrast to plain ilog2, this rounds towards the next power of 2, | |
558 | * so the socket application gets at least its desired sndbuf / rcvbuf size. | |
559 | */ | |
560 | static u8 smc_compress_bufsize(int size) | |
561 | { | |
562 | u8 compressed; | |
563 | ||
564 | if (size <= SMC_BUF_MIN_SIZE) | |
565 | return 0; | |
566 | ||
567 | size = (size - 1) >> 14; | |
568 | compressed = ilog2(size) + 1; | |
569 | if (compressed >= SMC_RMBE_SIZES) | |
570 | compressed = SMC_RMBE_SIZES - 1; | |
571 | return compressed; | |
572 | } | |
573 | ||
574 | /* convert the RMB size from compressed notation into integer */ | |
575 | int smc_uncompress_bufsize(u8 compressed) | |
576 | { | |
577 | u32 size; | |
578 | ||
579 | size = 0x00000001 << (((int)compressed) + 14); | |
580 | return (int)size; | |
581 | } | |
582 | ||
3e034725 UB |
583 | /* try to reuse a sndbuf or rmb description slot for a certain |
584 | * buffer size; if not available, return NULL | |
cd6851f3 | 585 | */ |
8437bda0 HW |
586 | static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, |
587 | rwlock_t *lock, | |
588 | struct list_head *buf_list) | |
cd6851f3 | 589 | { |
3e034725 | 590 | struct smc_buf_desc *buf_slot; |
cd6851f3 | 591 | |
3e034725 UB |
592 | read_lock_bh(lock); |
593 | list_for_each_entry(buf_slot, buf_list, list) { | |
594 | if (cmpxchg(&buf_slot->used, 0, 1) == 0) { | |
595 | read_unlock_bh(lock); | |
596 | return buf_slot; | |
cd6851f3 UB |
597 | } |
598 | } | |
3e034725 | 599 | read_unlock_bh(lock); |
cd6851f3 UB |
600 | return NULL; |
601 | } | |
602 | ||
952310cc UB |
603 | /* one of the conditions for announcing a receiver's current window size is |
604 | * that it "results in a minimum increase in the window size of 10% of the | |
605 | * receive buffer space" [RFC7609] | |
606 | */ | |
607 | static inline int smc_rmb_wnd_update_limit(int rmbe_size) | |
608 | { | |
609 | return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); | |
610 | } | |
611 | ||
b33982c3 UB |
612 | static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, |
613 | bool is_rmb, int bufsize) | |
614 | { | |
615 | struct smc_buf_desc *buf_desc; | |
616 | struct smc_link *lnk; | |
617 | int rc; | |
618 | ||
619 | /* try to alloc a new buffer */ | |
620 | buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); | |
621 | if (!buf_desc) | |
622 | return ERR_PTR(-ENOMEM); | |
623 | ||
2ef4f27a SR |
624 | buf_desc->order = get_order(bufsize); |
625 | buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | | |
626 | __GFP_NOMEMALLOC | __GFP_COMP | | |
627 | __GFP_NORETRY | __GFP_ZERO, | |
628 | buf_desc->order); | |
629 | if (!buf_desc->pages) { | |
b33982c3 UB |
630 | kfree(buf_desc); |
631 | return ERR_PTR(-EAGAIN); | |
632 | } | |
2ef4f27a | 633 | buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); |
b33982c3 UB |
634 | |
635 | /* build the sg table from the pages */ | |
636 | lnk = &lgr->lnk[SMC_SINGLE_LINK]; | |
637 | rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, | |
638 | GFP_KERNEL); | |
639 | if (rc) { | |
6511aad3 | 640 | smc_buf_free(lgr, is_rmb, buf_desc); |
b33982c3 UB |
641 | return ERR_PTR(rc); |
642 | } | |
643 | sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, | |
644 | buf_desc->cpu_addr, bufsize); | |
645 | ||
646 | /* map sg table to DMA address */ | |
647 | rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, | |
648 | is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); | |
649 | /* SMC protocol depends on mapping to one DMA address only */ | |
650 | if (rc != 1) { | |
6511aad3 | 651 | smc_buf_free(lgr, is_rmb, buf_desc); |
b33982c3 UB |
652 | return ERR_PTR(-EAGAIN); |
653 | } | |
654 | ||
655 | /* create a new memory region for the RMB */ | |
656 | if (is_rmb) { | |
657 | rc = smc_ib_get_memory_region(lnk->roce_pd, | |
658 | IB_ACCESS_REMOTE_WRITE | | |
659 | IB_ACCESS_LOCAL_WRITE, | |
660 | buf_desc); | |
661 | if (rc) { | |
6511aad3 | 662 | smc_buf_free(lgr, is_rmb, buf_desc); |
b33982c3 UB |
663 | return ERR_PTR(rc); |
664 | } | |
665 | } | |
666 | ||
69cb7dc0 | 667 | buf_desc->len = bufsize; |
b33982c3 UB |
668 | return buf_desc; |
669 | } | |
670 | ||
3e034725 | 671 | static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) |
cd6851f3 | 672 | { |
8437bda0 | 673 | struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); |
cd6851f3 UB |
674 | struct smc_connection *conn = &smc->conn; |
675 | struct smc_link_group *lgr = conn->lgr; | |
3e034725 | 676 | struct list_head *buf_list; |
c45abf31 | 677 | int bufsize, bufsize_short; |
3e034725 UB |
678 | int sk_buf_size; |
679 | rwlock_t *lock; | |
cd6851f3 | 680 | |
3e034725 UB |
681 | if (is_rmb) |
682 | /* use socket recv buffer size (w/o overhead) as start value */ | |
683 | sk_buf_size = smc->sk.sk_rcvbuf / 2; | |
684 | else | |
685 | /* use socket send buffer size (w/o overhead) as start value */ | |
686 | sk_buf_size = smc->sk.sk_sndbuf / 2; | |
687 | ||
4e1061f4 | 688 | for (bufsize_short = smc_compress_bufsize(sk_buf_size); |
c45abf31 | 689 | bufsize_short >= 0; bufsize_short--) { |
9d8fb617 | 690 | |
3e034725 UB |
691 | if (is_rmb) { |
692 | lock = &lgr->rmbs_lock; | |
693 | buf_list = &lgr->rmbs[bufsize_short]; | |
694 | } else { | |
695 | lock = &lgr->sndbufs_lock; | |
696 | buf_list = &lgr->sndbufs[bufsize_short]; | |
9d8fb617 | 697 | } |
c45abf31 | 698 | bufsize = smc_uncompress_bufsize(bufsize_short); |
a3fe3d01 UB |
699 | if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) |
700 | continue; | |
701 | ||
3e034725 | 702 | /* check for reusable slot in the link group */ |
8437bda0 | 703 | buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); |
3e034725 UB |
704 | if (buf_desc) { |
705 | memset(buf_desc->cpu_addr, 0, bufsize); | |
cd6851f3 UB |
706 | break; /* found reusable slot */ |
707 | } | |
a3fe3d01 | 708 | |
b33982c3 UB |
709 | buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); |
710 | if (PTR_ERR(buf_desc) == -ENOMEM) | |
711 | break; | |
712 | if (IS_ERR(buf_desc)) | |
a3fe3d01 | 713 | continue; |
897e1c24 | 714 | |
3e034725 UB |
715 | buf_desc->used = 1; |
716 | write_lock_bh(lock); | |
717 | list_add(&buf_desc->list, buf_list); | |
718 | write_unlock_bh(lock); | |
719 | break; /* found */ | |
cd6851f3 | 720 | } |
3e034725 | 721 | |
b33982c3 | 722 | if (IS_ERR(buf_desc)) |
3e034725 UB |
723 | return -ENOMEM; |
724 | ||
725 | if (is_rmb) { | |
726 | conn->rmb_desc = buf_desc; | |
c45abf31 UB |
727 | conn->rmbe_size_short = bufsize_short; |
728 | smc->sk.sk_rcvbuf = bufsize * 2; | |
5f08318f | 729 | atomic_set(&conn->bytes_to_rcv, 0); |
c45abf31 | 730 | conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); |
cd6851f3 | 731 | } else { |
3e034725 | 732 | conn->sndbuf_desc = buf_desc; |
3e034725 UB |
733 | smc->sk.sk_sndbuf = bufsize * 2; |
734 | atomic_set(&conn->sndbuf_space, bufsize); | |
cd6851f3 | 735 | } |
3e034725 UB |
736 | return 0; |
737 | } | |
738 | ||
10428dd8 UB |
739 | void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) |
740 | { | |
741 | struct smc_link_group *lgr = conn->lgr; | |
742 | ||
743 | smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, | |
744 | conn->sndbuf_desc, DMA_TO_DEVICE); | |
745 | } | |
746 | ||
747 | void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) | |
748 | { | |
749 | struct smc_link_group *lgr = conn->lgr; | |
750 | ||
751 | smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, | |
752 | conn->sndbuf_desc, DMA_TO_DEVICE); | |
753 | } | |
754 | ||
755 | void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) | |
756 | { | |
757 | struct smc_link_group *lgr = conn->lgr; | |
758 | ||
759 | smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, | |
760 | conn->rmb_desc, DMA_FROM_DEVICE); | |
761 | } | |
762 | ||
763 | void smc_rmb_sync_sg_for_device(struct smc_connection *conn) | |
764 | { | |
765 | struct smc_link_group *lgr = conn->lgr; | |
766 | ||
767 | smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, | |
768 | conn->rmb_desc, DMA_FROM_DEVICE); | |
769 | } | |
770 | ||
3e034725 UB |
771 | /* create the send and receive buffer for an SMC socket; |
772 | * receive buffers are called RMBs; | |
773 | * (even though the SMC protocol allows more than one RMB-element per RMB, | |
774 | * the Linux implementation uses just one RMB-element per RMB, i.e. uses an | |
775 | * extra RMB for every connection in a link group | |
776 | */ | |
777 | int smc_buf_create(struct smc_sock *smc) | |
778 | { | |
779 | int rc; | |
780 | ||
781 | /* create send buffer */ | |
782 | rc = __smc_buf_create(smc, false); | |
783 | if (rc) | |
784 | return rc; | |
785 | /* create rmb */ | |
786 | rc = __smc_buf_create(smc, true); | |
787 | if (rc) | |
6511aad3 | 788 | smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); |
3e034725 | 789 | return rc; |
cd6851f3 | 790 | } |
bd4ad577 UB |
791 | |
792 | static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) | |
793 | { | |
794 | int i; | |
795 | ||
796 | for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { | |
797 | if (!test_and_set_bit(i, lgr->rtokens_used_mask)) | |
798 | return i; | |
799 | } | |
800 | return -ENOSPC; | |
801 | } | |
802 | ||
4ed75de5 KG |
803 | /* add a new rtoken from peer */ |
804 | int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) | |
bd4ad577 | 805 | { |
4ed75de5 KG |
806 | u64 dma_addr = be64_to_cpu(nw_vaddr); |
807 | u32 rkey = ntohl(nw_rkey); | |
bd4ad577 UB |
808 | int i; |
809 | ||
810 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { | |
811 | if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && | |
263eec9b | 812 | (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && |
bd4ad577 | 813 | test_bit(i, lgr->rtokens_used_mask)) { |
4ed75de5 KG |
814 | /* already in list */ |
815 | return i; | |
816 | } | |
817 | } | |
818 | i = smc_rmb_reserve_rtoken_idx(lgr); | |
819 | if (i < 0) | |
820 | return i; | |
821 | lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; | |
822 | lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; | |
823 | return i; | |
824 | } | |
825 | ||
826 | /* delete an rtoken */ | |
827 | int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) | |
828 | { | |
829 | u32 rkey = ntohl(nw_rkey); | |
830 | int i; | |
831 | ||
832 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { | |
833 | if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && | |
834 | test_bit(i, lgr->rtokens_used_mask)) { | |
835 | lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; | |
836 | lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; | |
837 | ||
838 | clear_bit(i, lgr->rtokens_used_mask); | |
bd4ad577 UB |
839 | return 0; |
840 | } | |
841 | } | |
4ed75de5 KG |
842 | return -ENOENT; |
843 | } | |
844 | ||
845 | /* save rkey and dma_addr received from peer during clc handshake */ | |
846 | int smc_rmb_rtoken_handling(struct smc_connection *conn, | |
847 | struct smc_clc_msg_accept_confirm *clc) | |
848 | { | |
849 | conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, | |
850 | clc->rmb_rkey); | |
bd4ad577 UB |
851 | if (conn->rtoken_idx < 0) |
852 | return conn->rtoken_idx; | |
bd4ad577 UB |
853 | return 0; |
854 | } | |
9fda3510 HW |
855 | |
856 | /* Called (from smc_exit) when module is removed */ | |
857 | void smc_core_exit(void) | |
858 | { | |
859 | struct smc_link_group *lgr, *lg; | |
860 | LIST_HEAD(lgr_freeing_list); | |
861 | ||
862 | spin_lock_bh(&smc_lgr_list.lock); | |
863 | if (!list_empty(&smc_lgr_list.list)) | |
864 | list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); | |
865 | spin_unlock_bh(&smc_lgr_list.lock); | |
866 | list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { | |
867 | list_del_init(&lgr->list); | |
868 | smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); | |
869 | cancel_delayed_work_sync(&lgr->free_work); | |
870 | smc_lgr_free(lgr); /* free link group */ | |
871 | } | |
872 | } |