Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
952310cc UB |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | |
4 | * | |
5 | * Manage RMBE | |
6 | * copy new RMBE data into user space | |
7 | * | |
8 | * Copyright IBM Corp. 2016 | |
9 | * | |
10 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | |
11 | */ | |
12 | ||
13 | #include <linux/net.h> | |
14 | #include <linux/rcupdate.h> | |
c3edc401 IM |
15 | #include <linux/sched/signal.h> |
16 | ||
952310cc UB |
17 | #include <net/sock.h> |
18 | ||
19 | #include "smc.h" | |
20 | #include "smc_core.h" | |
21 | #include "smc_cdc.h" | |
22 | #include "smc_tx.h" /* smc_tx_consumer_update() */ | |
23 | #include "smc_rx.h" | |
24 | ||
b51fa1b1 | 25 | /* callback implementation to wakeup consumers blocked with smc_rx_wait(). |
952310cc UB |
26 | * indirectly called by smc_cdc_msg_recv_action(). |
27 | */ | |
b51fa1b1 | 28 | static void smc_rx_wake_up(struct sock *sk) |
952310cc UB |
29 | { |
30 | struct socket_wq *wq; | |
31 | ||
32 | /* derived from sock_def_readable() */ | |
33 | /* called already in smc_listen_work() */ | |
34 | rcu_read_lock(); | |
35 | wq = rcu_dereference(sk->sk_wq); | |
36 | if (skwq_has_sleeper(wq)) | |
a9a08845 LT |
37 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | |
38 | EPOLLRDNORM | EPOLLRDBAND); | |
90e9517e | 39 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
952310cc UB |
40 | if ((sk->sk_shutdown == SHUTDOWN_MASK) || |
41 | (sk->sk_state == SMC_CLOSED)) | |
42 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); | |
952310cc UB |
43 | rcu_read_unlock(); |
44 | } | |
45 | ||
9014db20 SR |
46 | /* Update consumer cursor |
47 | * @conn connection to update | |
48 | * @cons consumer cursor | |
49 | * @len number of Bytes consumed | |
de8474eb SR |
50 | * Returns: |
51 | * 1 if we should end our receive, 0 otherwise | |
9014db20 | 52 | */ |
de8474eb SR |
53 | static int smc_rx_update_consumer(struct smc_sock *smc, |
54 | union smc_host_cursor cons, size_t len) | |
9014db20 | 55 | { |
de8474eb SR |
56 | struct smc_connection *conn = &smc->conn; |
57 | struct sock *sk = &smc->sk; | |
58 | bool force = false; | |
59 | int diff, rc = 0; | |
60 | ||
69cb7dc0 | 61 | smc_curs_add(conn->rmb_desc->len, &cons, len); |
de8474eb SR |
62 | |
63 | /* did we process urgent data? */ | |
64 | if (conn->urg_state == SMC_URG_VALID || conn->urg_rx_skip_pend) { | |
65 | diff = smc_curs_comp(conn->rmb_desc->len, &cons, | |
66 | &conn->urg_curs); | |
67 | if (sock_flag(sk, SOCK_URGINLINE)) { | |
68 | if (diff == 0) { | |
69 | force = true; | |
70 | rc = 1; | |
71 | conn->urg_state = SMC_URG_READ; | |
72 | } | |
73 | } else { | |
74 | if (diff == 1) { | |
75 | /* skip urgent byte */ | |
76 | force = true; | |
77 | smc_curs_add(conn->rmb_desc->len, &cons, 1); | |
78 | conn->urg_rx_skip_pend = false; | |
79 | } else if (diff < -1) | |
80 | /* we read past urgent byte */ | |
81 | conn->urg_state = SMC_URG_READ; | |
82 | } | |
83 | } | |
84 | ||
bac6de7b | 85 | smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn); |
de8474eb | 86 | |
9014db20 SR |
87 | /* send consumer cursor update if required */ |
88 | /* similar to advertising new TCP rcv_wnd if required */ | |
de8474eb SR |
89 | smc_tx_consumer_update(conn, force); |
90 | ||
91 | return rc; | |
92 | } | |
93 | ||
94 | static void smc_rx_update_cons(struct smc_sock *smc, size_t len) | |
95 | { | |
96 | struct smc_connection *conn = &smc->conn; | |
97 | union smc_host_cursor cons; | |
98 | ||
bac6de7b | 99 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
de8474eb | 100 | smc_rx_update_consumer(smc, cons, len); |
9014db20 SR |
101 | } |
102 | ||
103 | struct smc_spd_priv { | |
104 | struct smc_sock *smc; | |
105 | size_t len; | |
106 | }; | |
107 | ||
108 | static void smc_rx_pipe_buf_release(struct pipe_inode_info *pipe, | |
109 | struct pipe_buffer *buf) | |
110 | { | |
111 | struct smc_spd_priv *priv = (struct smc_spd_priv *)buf->private; | |
112 | struct smc_sock *smc = priv->smc; | |
113 | struct smc_connection *conn; | |
9014db20 SR |
114 | struct sock *sk = &smc->sk; |
115 | ||
116 | if (sk->sk_state == SMC_CLOSED || | |
117 | sk->sk_state == SMC_PEERFINCLOSEWAIT || | |
118 | sk->sk_state == SMC_APPFINCLOSEWAIT) | |
119 | goto out; | |
120 | conn = &smc->conn; | |
121 | lock_sock(sk); | |
de8474eb | 122 | smc_rx_update_cons(smc, priv->len); |
9014db20 SR |
123 | release_sock(sk); |
124 | if (atomic_sub_and_test(priv->len, &conn->splice_pending)) | |
125 | smc_rx_wake_up(sk); | |
126 | out: | |
127 | kfree(priv); | |
128 | put_page(buf->page); | |
129 | sock_put(sk); | |
130 | } | |
131 | ||
132 | static int smc_rx_pipe_buf_nosteal(struct pipe_inode_info *pipe, | |
133 | struct pipe_buffer *buf) | |
134 | { | |
135 | return 1; | |
136 | } | |
137 | ||
138 | static const struct pipe_buf_operations smc_pipe_ops = { | |
9014db20 SR |
139 | .confirm = generic_pipe_buf_confirm, |
140 | .release = smc_rx_pipe_buf_release, | |
141 | .steal = smc_rx_pipe_buf_nosteal, | |
142 | .get = generic_pipe_buf_get | |
143 | }; | |
144 | ||
145 | static void smc_rx_spd_release(struct splice_pipe_desc *spd, | |
146 | unsigned int i) | |
147 | { | |
148 | put_page(spd->pages[i]); | |
149 | } | |
150 | ||
151 | static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len, | |
152 | struct smc_sock *smc) | |
153 | { | |
154 | struct splice_pipe_desc spd; | |
155 | struct partial_page partial; | |
156 | struct smc_spd_priv *priv; | |
9014db20 SR |
157 | int bytes; |
158 | ||
9014db20 SR |
159 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
160 | if (!priv) | |
161 | return -ENOMEM; | |
162 | priv->len = len; | |
163 | priv->smc = smc; | |
164 | partial.offset = src - (char *)smc->conn.rmb_desc->cpu_addr; | |
165 | partial.len = len; | |
166 | partial.private = (unsigned long)priv; | |
167 | ||
168 | spd.nr_pages_max = 1; | |
169 | spd.nr_pages = 1; | |
48bf5231 | 170 | spd.pages = &smc->conn.rmb_desc->pages; |
9014db20 SR |
171 | spd.partial = &partial; |
172 | spd.ops = &smc_pipe_ops; | |
173 | spd.spd_release = smc_rx_spd_release; | |
174 | ||
175 | bytes = splice_to_pipe(pipe, &spd); | |
176 | if (bytes > 0) { | |
177 | sock_hold(&smc->sk); | |
178 | get_page(smc->conn.rmb_desc->pages); | |
179 | atomic_add(bytes, &smc->conn.splice_pending); | |
180 | } | |
181 | ||
182 | return bytes; | |
183 | } | |
184 | ||
185 | static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn) | |
186 | { | |
187 | return atomic_read(&conn->bytes_to_rcv) && | |
188 | !atomic_read(&conn->splice_pending); | |
189 | } | |
190 | ||
952310cc UB |
191 | /* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted |
192 | * @smc smc socket | |
193 | * @timeo pointer to max seconds to wait, pointer to value 0 for no timeout | |
b51fa1b1 | 194 | * @fcrit add'l criterion to evaluate as function pointer |
952310cc UB |
195 | * Returns: |
196 | * 1 if at least 1 byte available in rcvbuf or if socket error/shutdown. | |
197 | * 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted). | |
198 | */ | |
b51fa1b1 SR |
199 | int smc_rx_wait(struct smc_sock *smc, long *timeo, |
200 | int (*fcrit)(struct smc_connection *conn)) | |
952310cc UB |
201 | { |
202 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | |
203 | struct smc_connection *conn = &smc->conn; | |
204 | struct sock *sk = &smc->sk; | |
205 | int rc; | |
206 | ||
b51fa1b1 | 207 | if (fcrit(conn)) |
952310cc UB |
208 | return 1; |
209 | sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); | |
210 | add_wait_queue(sk_sleep(sk), &wait); | |
211 | rc = sk_wait_event(sk, timeo, | |
212 | sk->sk_err || | |
213 | sk->sk_shutdown & RCV_SHUTDOWN || | |
b51fa1b1 | 214 | fcrit(conn) || |
952310cc UB |
215 | smc_cdc_rxed_any_close_or_senddone(conn), |
216 | &wait); | |
217 | remove_wait_queue(sk_sleep(sk), &wait); | |
218 | sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); | |
219 | return rc; | |
220 | } | |
221 | ||
de8474eb SR |
222 | static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len, |
223 | int flags) | |
224 | { | |
225 | struct smc_connection *conn = &smc->conn; | |
226 | union smc_host_cursor cons; | |
227 | struct sock *sk = &smc->sk; | |
228 | int rc = 0; | |
229 | ||
230 | if (sock_flag(sk, SOCK_URGINLINE) || | |
231 | !(conn->urg_state == SMC_URG_VALID) || | |
232 | conn->urg_state == SMC_URG_READ) | |
233 | return -EINVAL; | |
234 | ||
235 | if (conn->urg_state == SMC_URG_VALID) { | |
236 | if (!(flags & MSG_PEEK)) | |
237 | smc->conn.urg_state = SMC_URG_READ; | |
238 | msg->msg_flags |= MSG_OOB; | |
239 | if (len > 0) { | |
240 | if (!(flags & MSG_TRUNC)) | |
241 | rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1); | |
242 | len = 1; | |
bac6de7b | 243 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
de8474eb SR |
244 | if (smc_curs_diff(conn->rmb_desc->len, &cons, |
245 | &conn->urg_curs) > 1) | |
246 | conn->urg_rx_skip_pend = true; | |
247 | /* Urgent Byte was already accounted for, but trigger | |
248 | * skipping the urgent byte in non-inline case | |
249 | */ | |
250 | if (!(flags & MSG_PEEK)) | |
251 | smc_rx_update_consumer(smc, cons, 0); | |
252 | } else { | |
253 | msg->msg_flags |= MSG_TRUNC; | |
254 | } | |
255 | ||
256 | return rc ? -EFAULT : len; | |
257 | } | |
258 | ||
259 | if (sk->sk_state == SMC_CLOSED || sk->sk_shutdown & RCV_SHUTDOWN) | |
260 | return 0; | |
261 | ||
262 | return -EAGAIN; | |
263 | } | |
264 | ||
9014db20 SR |
265 | /* smc_rx_recvmsg - receive data from RMBE |
266 | * @msg: copy data to receive buffer | |
267 | * @pipe: copy data to pipe if set - indicates splice() call | |
268 | * | |
269 | * rcvbuf consumer: main API called by socket layer. | |
270 | * Called under sk lock. | |
952310cc | 271 | */ |
9014db20 SR |
272 | int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, |
273 | struct pipe_inode_info *pipe, size_t len, int flags) | |
952310cc UB |
274 | { |
275 | size_t copylen, read_done = 0, read_remaining = len; | |
276 | size_t chunk_len, chunk_off, chunk_len_sum; | |
277 | struct smc_connection *conn = &smc->conn; | |
9014db20 | 278 | int (*func)(struct smc_connection *conn); |
952310cc UB |
279 | union smc_host_cursor cons; |
280 | int readable, chunk; | |
281 | char *rcvbuf_base; | |
282 | struct sock *sk; | |
9014db20 | 283 | int splbytes; |
952310cc UB |
284 | long timeo; |
285 | int target; /* Read at least these many bytes */ | |
286 | int rc; | |
287 | ||
288 | if (unlikely(flags & MSG_ERRQUEUE)) | |
289 | return -EINVAL; /* future work for sk.sk_family == AF_SMC */ | |
952310cc UB |
290 | |
291 | sk = &smc->sk; | |
292 | if (sk->sk_state == SMC_LISTEN) | |
293 | return -ENOTCONN; | |
de8474eb SR |
294 | if (flags & MSG_OOB) |
295 | return smc_rx_recv_urg(smc, msg, len, flags); | |
952310cc UB |
296 | timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
297 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | |
298 | ||
952310cc | 299 | /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */ |
be244f28 | 300 | rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr; |
952310cc UB |
301 | |
302 | do { /* while (read_remaining) */ | |
9014db20 | 303 | if (read_done >= target || (pipe && read_done)) |
952310cc UB |
304 | break; |
305 | ||
306 | if (atomic_read(&conn->bytes_to_rcv)) | |
307 | goto copy; | |
de8474eb SR |
308 | else if (conn->urg_state == SMC_URG_VALID) |
309 | /* we received a single urgent Byte - skip */ | |
310 | smc_rx_update_cons(smc, 0); | |
952310cc | 311 | |
c8b8ec8e SR |
312 | if (sk->sk_shutdown & RCV_SHUTDOWN || |
313 | smc_cdc_rxed_any_close_or_senddone(conn) || | |
314 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) | |
315 | break; | |
316 | ||
952310cc UB |
317 | if (read_done) { |
318 | if (sk->sk_err || | |
319 | sk->sk_state == SMC_CLOSED || | |
952310cc | 320 | !timeo || |
c8b8ec8e | 321 | signal_pending(current)) |
952310cc UB |
322 | break; |
323 | } else { | |
952310cc UB |
324 | if (sk->sk_err) { |
325 | read_done = sock_error(sk); | |
326 | break; | |
327 | } | |
952310cc UB |
328 | if (sk->sk_state == SMC_CLOSED) { |
329 | if (!sock_flag(sk, SOCK_DONE)) { | |
330 | /* This occurs when user tries to read | |
331 | * from never connected socket. | |
332 | */ | |
333 | read_done = -ENOTCONN; | |
334 | break; | |
335 | } | |
336 | break; | |
337 | } | |
338 | if (signal_pending(current)) { | |
339 | read_done = sock_intr_errno(timeo); | |
340 | break; | |
341 | } | |
846e344e HW |
342 | if (!timeo) |
343 | return -EAGAIN; | |
952310cc UB |
344 | } |
345 | ||
b51fa1b1 SR |
346 | if (!smc_rx_data_available(conn)) { |
347 | smc_rx_wait(smc, &timeo, smc_rx_data_available); | |
952310cc UB |
348 | continue; |
349 | } | |
350 | ||
351 | copy: | |
352 | /* initialize variables for 1st iteration of subsequent loop */ | |
b51fa1b1 | 353 | /* could be just 1 byte, even after waiting on data above */ |
952310cc | 354 | readable = atomic_read(&conn->bytes_to_rcv); |
9014db20 SR |
355 | splbytes = atomic_read(&conn->splice_pending); |
356 | if (!readable || (msg && splbytes)) { | |
357 | if (splbytes) | |
358 | func = smc_rx_data_available_and_no_splice_pend; | |
359 | else | |
360 | func = smc_rx_data_available; | |
361 | smc_rx_wait(smc, &timeo, func); | |
362 | continue; | |
363 | } | |
364 | ||
bac6de7b | 365 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
9014db20 SR |
366 | /* subsequent splice() calls pick up where previous left */ |
367 | if (splbytes) | |
69cb7dc0 | 368 | smc_curs_add(conn->rmb_desc->len, &cons, splbytes); |
de8474eb SR |
369 | if (conn->urg_state == SMC_URG_VALID && |
370 | sock_flag(&smc->sk, SOCK_URGINLINE) && | |
371 | readable > 1) | |
372 | readable--; /* always stop at urgent Byte */ | |
373 | /* not more than what user space asked for */ | |
374 | copylen = min_t(size_t, read_remaining, readable); | |
952310cc UB |
375 | /* determine chunks where to read from rcvbuf */ |
376 | /* either unwrapped case, or 1st chunk of wrapped case */ | |
69cb7dc0 HW |
377 | chunk_len = min_t(size_t, copylen, conn->rmb_desc->len - |
378 | cons.count); | |
952310cc UB |
379 | chunk_len_sum = chunk_len; |
380 | chunk_off = cons.count; | |
10428dd8 | 381 | smc_rmb_sync_sg_for_cpu(conn); |
952310cc UB |
382 | for (chunk = 0; chunk < 2; chunk++) { |
383 | if (!(flags & MSG_TRUNC)) { | |
9014db20 SR |
384 | if (msg) { |
385 | rc = memcpy_to_msg(msg, rcvbuf_base + | |
386 | chunk_off, | |
387 | chunk_len); | |
388 | } else { | |
389 | rc = smc_rx_splice(pipe, rcvbuf_base + | |
390 | chunk_off, chunk_len, | |
391 | smc); | |
392 | } | |
393 | if (rc < 0) { | |
952310cc UB |
394 | if (!read_done) |
395 | read_done = -EFAULT; | |
10428dd8 | 396 | smc_rmb_sync_sg_for_device(conn); |
952310cc UB |
397 | goto out; |
398 | } | |
399 | } | |
400 | read_remaining -= chunk_len; | |
401 | read_done += chunk_len; | |
402 | ||
403 | if (chunk_len_sum == copylen) | |
404 | break; /* either on 1st or 2nd iteration */ | |
405 | /* prepare next (== 2nd) iteration */ | |
406 | chunk_len = copylen - chunk_len; /* remainder */ | |
407 | chunk_len_sum += chunk_len; | |
408 | chunk_off = 0; /* modulo offset in recv ring buffer */ | |
409 | } | |
10428dd8 | 410 | smc_rmb_sync_sg_for_device(conn); |
952310cc UB |
411 | |
412 | /* update cursors */ | |
413 | if (!(flags & MSG_PEEK)) { | |
952310cc UB |
414 | /* increased in recv tasklet smc_cdc_msg_rcv() */ |
415 | smp_mb__before_atomic(); | |
416 | atomic_sub(copylen, &conn->bytes_to_rcv); | |
69cb7dc0 | 417 | /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */ |
952310cc | 418 | smp_mb__after_atomic(); |
de8474eb SR |
419 | if (msg && smc_rx_update_consumer(smc, cons, copylen)) |
420 | goto out; | |
952310cc UB |
421 | } |
422 | } while (read_remaining); | |
423 | out: | |
424 | return read_done; | |
425 | } | |
426 | ||
427 | /* Initialize receive properties on connection establishment. NB: not __init! */ | |
428 | void smc_rx_init(struct smc_sock *smc) | |
429 | { | |
b51fa1b1 | 430 | smc->sk.sk_data_ready = smc_rx_wake_up; |
9014db20 | 431 | atomic_set(&smc->conn.splice_pending, 0); |
de8474eb | 432 | smc->conn.urg_state = SMC_URG_READ; |
952310cc | 433 | } |